<a href="https://colab.research.google.com/github/rubythalib33/learn-nlp/blob/main/01Basic_Chatbot/Python_Chatbot_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# source: https://www.projectpro.io/article/python-chatbot-project-learn-to-build-a-chatbot-from-scratch/429#toc-6

In [2]:
data_root = "drive/MyDrive/chatbot/"

In [13]:
import json
import string
import random

import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
nltk.download("punkt")
nltk.download("wordnet")
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [7]:
data_file = open(data_root+"/intents.json").read()
data = json.loads(data_file)

In [10]:
# Step-4: Identifying Feature and Target for the NLP Model
words = []
classes = []
data_X = []
data_Y = []

for intent in data["intents"]:
  for pattern in intent['patterns']:
    tokens = nltk.word_tokenize(pattern)
    words.extend(tokens)
    data_X.append(pattern)
    data_Y.append(intent['tag'])
  
  if intent["tag"] not in classes:
    classes.append(intent["tag"])

In [11]:
lemmatizer = WordNetLemmatizer()

In [14]:
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
words = sorted(set(words))
classes = sorted(set(classes))

In [16]:
#Step-5: Making the data Machine-friendly
training = []
out_empty = [0]*len(classes)
for idx, doc in enumerate(data_X):
  bow = []
  text = lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)
  
  output_row = list(out_empty)
  output_row[classes.index(data_Y[idx])] = 1

  training.append([bow,output_row] )

random.shuffle(training)
training = np.array(training, dtype=object)
train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

In [19]:
#6 The Neural Network model

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(classes), activation="softmax"))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(x=train_x, y=train_y, epochs=150, verbose=1)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (None, 128)               30592     
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense_6 (Dense)             (None, 64)                8256      
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_7 (Dense)             (None, 44)                2860      
                                                                 
Total params: 41,708
Trainable params: 41,708
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/1

<keras.callbacks.History at 0x7f0529dc84d0>

In [22]:
# Step-7: Pre-processing the User’s Input
def clean_text(text):
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(token) for token in tokens]
  return tokens

def bag_of_words(text, vocab):
  tokens = clean_text(text)
  bow = [0]*len(vocab)
  for w in tokens:
    for idx, word in enumerate(vocab):
      if word == w:
        bow[idx] = 1
  return np.array(bow)

def pred_labels(text, vocab, labels):
  bow = bag_of_words(text, vocab)
  result = model.predict(np.array([bow]))[0]
  tresh = 0.5
  y_pred = [[indx, pred] for indx, pred in enumerate(result) if pred > tresh]
  y_pred.sort(key = lambda x: x[1], reverse=True)
  return_list = [labels[r[0]] for r in y_pred]

  return return_list

def get_response(intents_list, intents_json):
  if len(intents_list) == 0:
    result = "Sorry I don't Understand"
  else:
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
      if i["tag"] == tag:
        result = random.choice(i["responses"])
        break

  return result

In [23]:
# Step 8 interacting with the chatbot
print("press 0 if you want to exit")
while True:
  message = input()
  if message == "0":
    print("program exit")
    break
  intents = pred_labels(message, words, classes)
  result = get_response(intents, data)
  print(result)

press 0 if you want to exit
hello
Hi there, how can I help?
how are you?
Sorry I don't Understand
good morning
Hi there, how can I help?
0
program exit
