In [None]:
# https://towardsdatascience.com/a-simple-chatbot-in-python-with-deep-learning-3e8669997758


In [None]:
# Using a dictionary to represent an intents.json file

data = {'intents': [
                    {'tag' : 'greeting',
                     'patterns': ['Hello', 'How are you?', 'Hi there', 'Hi'],
                     'responses': ['Hello', 'How are you doing?', 'Greetings!', ' How do you do?']},
                    {'tag': 'age',
                     'patterns' : ['How old are you?', 'When is your birthday?', 'When were you born'],
                     'responses': ['I am 25 years old', 'I was born in 1995', 'My birthday is 28th March 1995', '28/3/1995']},
                    {"tag": "date",
                     "patterns": ["what are you doing this weekend?",
                     "do you want to hang out some time?", "what are your plans for this week"],
                     "responses": ["I am available all week", "I don't have any plans", "I am not busy"]},
                    {"tag": "name",
                     "patterns": ["what's your name?", "what are you called?", "who are you?"],
                     "responses": ["My name is Kippi", "I'm Kippi", "Kippi"]},
                    {"tag": "goodbye",
                     "patterns": [ "bye", "g2g", "see ya", "adios", "cya"],
                     "responses": ["It was nice speaking to you", "See you later", "Speak soon!"]}
]}

In [None]:
words = []
classes = []
doc_x = []
doc_y = []

import nltk
from nltk.stem import WordNetLemmatizer
import string
nltk.download('punkt')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

for intent in data['intents']:
  for pattern in intent['patterns']:
    tokens = nltk.word_tokenize(pattern) # keeps exclamation marks as seperate token
    words.extend(tokens)
    doc_x.append(pattern)
    doc_y.append(intent['tag'])

    if intent['tag'] not in classes:
      classes.append(intent['tag'])

words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]

print(words)
print(classes)

words = sorted(set(words))
classes = sorted(set(classes))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
['hello', 'how', 'are', 'you', 'hi', 'there', 'hi', 'how', 'old', 'are', 'you', 'when', 'is', 'your', 'birthday', 'when', 'were', 'you', 'born', 'what', 'are', 'you', 'doing', 'this', 'weekend', 'do', 'you', 'want', 'to', 'hang', 'out', 'some', 'time', 'what', 'are', 'your', 'plan', 'for', 'this', 'week', 'what', "'s", 'your', 'name', 'what', 'are', 'you', 'called', 'who', 'are', 'you', 'bye', 'g2g', 'see', 'ya', 'adios', 'cya']
['greeting', 'age', 'date', 'name', 'goodbye']


In [None]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [None]:
print(doc_x)
print(doc_y)

['Hello', 'How are you?', 'Hi there', 'Hi', 'How old are you?', 'When is your birthday?', 'When were you born', 'what are you doing this weekend?', 'do you want to hang out some time?', 'what are your plans for this week', "what's your name?", 'what are you called?', 'who are you?', 'bye', 'g2g', 'see ya', 'adios', 'cya']
['greeting', 'greeting', 'greeting', 'greeting', 'age', 'age', 'age', 'date', 'date', 'date', 'name', 'name', 'name', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye']


In [None]:
out_empty = [0] * len(classes)
training = []

import numpy as np

for idx, doc in enumerate(doc_x):
  bow = []
  text = lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)
  output_row = list(out_empty)
  output_row[classes.index(doc_y[idx])] = 1

  training.append([bow, output_row])

import random

random.shuffle(training)
training = np.array(training, dtype = object)

train_x = np.array(list(training[:,0]))
train_y = np.array(list(training[:,1]))

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import tensorflow as tf

input_shape = (len(train_x[0]),)
output_shape = len(train_y[0])
epochs = 200

model = Sequential()
model.add(Dense(128, input_shape = input_shape, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(output_shape, activation = 'softmax'))

adam = tf.keras.optimizers.Adam(learning_rate = 0.01, decay = 1e-6)

model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
print(model.summary())
model.fit(train_x, train_y, epochs = 200, verbose = 1)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 128)               4864      
                                                                 
 dropout_8 (Dropout)         (None, 128)               0         
                                                                 
 dense_13 (Dense)            (None, 64)                8256      
                                                                 
 dropout_9 (Dropout)         (None, 64)                0         
                                                                 
 dense_14 (Dense)            (None, 5)                 325       
                                                                 
Total params: 13,445
Trainable params: 13,445
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/2

<keras.callbacks.History at 0x7fc03931ad90>

In [None]:
def clean_text(text):
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens

def bag_of_words(text, vocab):
  tokens = clean_text(text)
  bow = [0] * len(vocab)
  for w in tokens:
    for idx, word in enumerate(vocab):
      if word == w:
        bow[idx] = 1
  return np.array(bow)

def pred_class(text, vocab, labels):
  bow = bag_of_words(text, vocab)
  result = model.predict(np.array([bow]))[0]
  thresh = 0.2
  y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]

  y_pred.sort(key = lambda x: x[1], reverse = True)
  return_list = []
  for r in y_pred:
    return_list.append(labels[r[0]])
  return return_list

def get_response(intents_list, intents_json):
  tag = intents_list[0]
  list_of_intents = intents_json['intents']
  for i in list_of_intents:
    if i['tag'] == tag:
      result = random.choice(i['responses'])
      break
  return result

In [None]:
while True:
  message = input('')
  intents = pred_class(message, words, classes)
  result = get_response(intents, data)
  print('Bot -> '+ result)

hi
Chatbot -> How are you doing?


KeyboardInterrupt: ignored