<a href="https://colab.research.google.com/github/swatiaggrawal/Chatbot-by-nltk/blob/main/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#mounting google drive
from google.colab import drive
drive.mount('/content/drive')
data_root='/content/drive/My Drive/Chatbot'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#importing required libraries
import json
import string
import random
import nltk
import numpy as np
import tensorflow as tf
from nltk.stem import WordNetLemmatizer
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Dropout
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
#Load dataset(intents.json)
data_file= open('/content/drive/MyDrive/ChatBot/intents.json').read()
data = json.loads(data_file) 
data

{'intents': [{'context': [''],
   'patterns': ['Hi there',
    'How are you',
    'Is anyone there?',
    'Hey',
    'Hola',
    'Hello',
    'Good day'],
   'responses': ['Hello, thanks for asking',
    'Good to see you again',
    'Hi there, how can I help?'],
   'tag': 'greeting'},
  {'context': [''],
   'patterns': ['Bye',
    'See you later',
    'Goodbye',
    'Nice chatting to you, bye',
    'Till next time'],
   'responses': ['See you!', 'Have a nice day', 'Bye! Come back again soon.'],
   'tag': 'goodbye'},
  {'context': [''],
   'patterns': ['Thanks',
    'Thank you',
    "That's helpful",
    'Awesome, thanks',
    'Thanks for helping me'],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure'],
   'tag': 'thanks'},
  {'context': [''],
   'patterns': [],
   'responses': ["Sorry, can't understand you",
    'Please give me more info',
    'Not sure I understand'],
   'tag': 'noanswer'},
  {'context': [''],
   'patterns': ['How you could help me?',
    'What help you pr

In [6]:
#creating data_X for words
#creting data_Y for classes
words=[] #for Bag-of-Words(BoW) model containing patterns
classes=[] #for BoW model containing tags
data_X=[] #for storing each pattern
data_Y=[] #for storing each tag

#iterating over all intents
for intent in data["intents"]:
  for pattern in intent["patterns"]:
    tokens=nltk.word_tokenize(pattern) #tokenizing each pattern
    words.extend(tokens) # append each token to word
    data_X.append(pattern) #append pattern to data_X
    data_Y.append(intent["tag"]), #append associated tag to each pattern
  # add tag to class if not present
  if intent["tag"] not in classes:
    classes.append(intent["tag"])
#initialize lemmatizer
lemmatizer = WordNetLemmatizer()
#lemmatize all word and covert to lowercase if not in punctuation
words=[lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
# sort the vocab and classes in alphabetical order and take set to ensure no duplicates
words=sorted(set(words))
classes=sorted(set(classes))


In [9]:
#converting text to numbers
training =[]
out_empty=[0]*len(classes)
#creating BoW model
for idx,doc in enumerate(data_X):
  bow=[]
  text=lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)
  #mark index of class associated with current pattern
  output_row=list(out_empty)
  output_row[classes.index(data_Y[idx])]=1
  #add 1 encoded bow and associated classes to training 
  training.append([bow,output_row])
#shuffle data and convert to array
random.shuffle(training)
training=np.array(training,dtype=object)
#split features and target labels
train_X=np.array(list(training[:,0]))
train_Y=np.array(list(training[:,1]))

In [10]:
#Neural Network Model
model=Sequential()
model.add(Dense(128,input_shape=(len(train_X[0]),),activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64,activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_Y[0]),activation='softmax'))
adam=tf.keras.optimizers.Adam(learning_rate=0.01,decay=1e-6)
model.compile(loss='categorical_crossentropy',optimizer=adam,metrics=["accuracy"])
print(model.summary())
model.fit(x=train_X,y=train_Y,epochs=150,verbose=1)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               30592     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 44)                2860      
                                                                 
Total params: 41,708
Trainable params: 41,708
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150

<keras.callbacks.History at 0x7fdce02d27d0>

In [14]:
#preprocessing the input

#recieve text and tokenize it 
def clean_text(text):
  tokens=nltk.word_tokenize(text)
  tokens=[lemmatizer.lemmatize(word) for word in tokens]
  return tokens

#convert text into array using BoW model
def bag_of_words(text,vocab):
  tokens=clean_text(text)
  bow=[0]*len(vocab)
  for w in tokens:
    for idx,word in enumerate(vocab):
      if word==w:
        bow[idx]=1
  return np.array(bow)

#return tag corresponding to the highest probablity
def pred_class(text,vocab,labels):
  bow=bag_of_words(text,vocab)
  result=model.predict(np.array([bow]))[0] #extract probablities
  thresh=0.5
  y_pred=[[indx,res] for indx,res in enumerate(result) if res>thresh]
  y_pred.sort(key=lambda x: x[1],reverse=True) #sort values of probablity in decreasing order
  return_list=[]
  for r in y_pred:
    return_list.append(labels[r[0]]) #containing labels/tags with highest probablity
  return return_list

#takes tag from pred_class and predcict response
def get_response(intents_list,intents_json):
  if len(intents_list)==0:
    result="Sorry!!!I don't get it...." 
  else:
    tag=intents_list[0]
    list_of_intents=intents_json["intents"]
    for i in list_of_intents:
      if i["tag"] == tag:
        result=random.choice(i["responses"])
        break
  return result


In [15]:
#interacting
print("enter end if you do not want to chat further.")
while True:
  message=input("")
  if message=="end":
    break
  intents=pred_class(message,words,classes)
  result=get_response(intents,data)
  print(result)

enter end if you do not want to chat further.
hi
please elaborate your question
how are you?
Thanks For Asking!How can I help you?
what help you provide?
I can provide support related to following problems technical query,management related query,order related query,tracking related query,procurement query,outsourcing problem,manufacturing delay,
track order 562B78
Delayed
track order 56
On the Way!
track order 34901
On the Way!
end
