# Chatbot in python

Naan Mudhalvan project to develop a chatbot for websites/app to provide instance customer service

## Importing required modules

In [None]:
import nltk
nltk.download('punkt')#Sentence tokenizer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
import random

## Pre-Processing NLP

In [None]:
words=[]
classes = []
documents = []
ignore_words = ['?', '!']
data_file = open('intents.json').read() # read json file
intents = json.loads(data_file) # load json file

In [None]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)# add each elements into list
        #combination between patterns and intents
        documents.append((w, intent['tag']))#add single element into end of list
        # add to tag in our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [None]:
nltk.download('wordnet') #lexical database for the English language

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [None]:
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# sort classes
classes = sorted(list(set(classes)))
# documents = combination between patterns and intents
print (len(documents), "documents\n", documents, "\n")
# classes = intents[tag]
print (len(classes), "classes\n", classes, "\n")
# words = all words, vocabulary
print (len(words), "unique lemmatized words\n", words, "\n")
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

In [None]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)
# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words
    pattern_words = doc[0]
    # convert pattern_words in lower case
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # create bag of words array,if word match found in current pattern then put 1 otherwise 0.[row * colm(263)]
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # in output array 0 value for each tag ang 1 value for matched tag.[row * colm(8)]
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])
# shuffle training and turn into np.array
random.shuffle(training)
training = np.array(training)
# create train and test. X - patterns(words), Y - intents(tags)
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data created")

Training data created


## Model Creation

In [None]:
from tensorflow.python.framework import ops
ops.reset_default_graph()

In [None]:
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))
print("First layer:",model.layers[0].get_weights()[0])

First layer: [[ 0.08650128 -0.0634183  -0.07228476 ...  0.0830598   0.06828941
   0.0835707 ]
 [-0.03220329 -0.07752044  0.00102633 ...  0.02529063 -0.04513907
  -0.06462342]
 [-0.01536799  0.10085208  0.09434902 ...  0.0604412   0.02147218
   0.09148347]
 ...
 [ 0.10917494 -0.02004863 -0.05833514 ... -0.03475115 -0.06402607
   0.01667366]
 [-0.11102267 -0.09436521  0.03962915 ...  0.12295949  0.02094749
   0.00144091]
 [-0.05854262  0.11776628 -0.02101282 ...  0.04151178  0.11692443
  -0.08398442]]


In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

## Training Model

In [None]:
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

print("model created")

## Preparing to test our model

In [None]:
def clean_up_sentence(sentence):
  sentence_words = nltk.word_tokenize(sentence)
  #print(sentence_words)
  # stem each word - create short form for word

  sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
  #print(sentence_words)

  return sentence_words

In [None]:
def bow(sentence, words, show_details=True):
  # tokenize the pattern

  sentence_words = clean_up_sentence(sentence)
  #print(sentence_words)

  # bag of words - matrix of N words, vocabulary matrix

  bag = [0]*len(words)
  #print(bag)

  for s in sentence_words:
      for i,w in enumerate(words):
          if w == s:
              # assign 1 if current word is in the vocabulary position
              bag[i] = 1
              if show_details:
                  print ("found in bag: %s" % w)
              #print ("found in bag: %s" % w)
  #print(bag)
  return(np.array(bag))

In [None]:
def predict_class(sentence, model):
  # filter out predictions below a threshold

  p = bow(sentence, words,show_details=False)
  #print(p)

  res = model.predict(np.array([p]))[0]
  #print(res)

  ERROR_THRESHOLD = 0.25

  results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
  #print(results)
  # sort by strength of probability

  results.sort(key=lambda x: x[1], reverse=True)
  #print(results)

  return_list = []

  for r in results:
      return_list.append({"intent": classes[r[0]], "probability": str(r[1])})

  return return_list
  #print(return_list)

In [None]:
def getResponse(ints, intents_json):
  tag = ints[0]['intent']
  #print(tag)

  list_of_intents = intents_json['intents']
  #print(list_of_intents)

  for i in list_of_intents:
      if(i['tag']== tag):
          result = random.choice(i['responses'])
          break
  return result

In [None]:
def chatbot_response(text):
  ints = predict_class(text, model)
  res = getResponse(ints, intents)
  #print(res)
  return res


## Testing model

In [None]:
start = True
while start:
  query = input('Enter Message:')
  if query in ['quit','exit','bye']:
      start = False
      continue
  try:
      res = chatbot_response(query)
      print(res)
  except:
      print('You may need to rephrase your question.')

Enter Message:Who are you?
I am your chatbot assist.
Enter Message:what can you do?
I can answer to low-intermediate questions regarding college
Enter Message:Where is your college?
VV College of Engineering, V.V.Nagar, Tisaiyanvilai(Via), Tirunelveli(Dist.)
Enter Message:Who is Principal?
Dr. K. S. Saji  is college principal and if you need any help then call your branch hod first.That is more appropriate
Enter Message:Who is computer science HOD?
Muthulakshmi
Enter Message:When college opens?
College is open 9:30 AM - 4:30 PM Monday-Friday!
Enter Message:What the hell!!!!!
please use appropriate language
Enter Message:Who created you?
Kim Poobi


KeyboardInterrupt: ignored