<a href="https://colab.research.google.com/github/teddius/TensorFlow_Chatbot_for_20171128_Talk_BotsHub_Meetup_Vienna/blob/master/python_chatbot_example_with_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
##########################################################################################
# "How to make a simple contextualized chatbot with tensorflow, keras, nltk and sklearn"
# 
#  by Andreas S. Rath <andreas.rath@ondewo.com> 
#  Github name: teddius
#  Github source code: http://bit.ly/tfcb17ondewo
#
#  Inspired by chatbotsmagazine article which was based on "tflearn" and is available at 
#  https://chatbotsmagazine.com/contextual-chat-bots-with-tensorflow-4391749d0077
##########################################################################################

In [14]:

######################################################################
# basic things we need for python processing and google colab
######################################################################

import random
import base64
import requests
import numpy as np
import json
from google.colab import files

In [15]:
######################################################################
# Things we need for NLP
######################################################################
import nltk
nltk.download('punkt')

from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer() # english stemmer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [18]:
######################################################################
# things we need for Tensorflow
######################################################################
import tensorflow as tf
print(tf.__version__)

from keras import metrics, optimizers
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from keras.layers import Dense, Flatten, Conv1D, Embedding, MaxPooling1D, Dropout
from keras.models import Sequential
from sklearn.model_selection import train_test_split

2.3.0


In [19]:
data_url = "https://raw.githubusercontent.com/teddius/TensorFlow_Chatbot_for_20171128_Talk_BotsHub_Meetup_Vienna/master/intents.json"
req = requests.get(data_url)
intent_json = req.text
print(intent_json)

{
  "intents": [
        {
         "tag": "greeting",
         "patterns": ["Hi", "Hey", "Ho", "What's up", "How are you", "Is anyone there?", "Hello", "Good day"],
         "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"],
         "context_set": ""
        },
        {
         "tag": "goodbye",
         "patterns": ["Bye", "Bye bye", "Ciao","See you later", "Goodbye"],
         "responses": ["Happy to talk to you later!","Thanks for showing up!", "Bye! Looking forward to seeing you again soon!"]
        },
        {
         "tag": "thanks",
         "patterns": ["Thanks", "Thank you", "That's helpful", "Perfect, thank you so much"],
         "responses": ["Happy to help!", "Any time!", "My pleasure"]
        },
        {
         "tag": "hours",
         "patterns": ["What hours are you open?", "What are your hours?", "When are you open?", "Opening times?" ],
         "responses": ["We're open every day 9am-9pm", "Our hours are 9am-

In [20]:
######################################################################
# Import our chat-bot intents file
######################################################################

intents = json.loads(intent_json)
print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Hey', 'Ho', "What's up", 'How are you', 'Is anyone there?', 'Hello', 'Good day'], 'responses': ['Hello, thanks for visiting', 'Good to see you again', 'Hi there, how can I help?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['Bye', 'Bye bye', 'Ciao', 'See you later', 'Goodbye'], 'responses': ['Happy to talk to you later!', 'Thanks for showing up!', 'Bye! Looking forward to seeing you again soon!']}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful", 'Perfect, thank you so much'], 'responses': ['Happy to help!', 'Any time!', 'My pleasure']}, {'tag': 'hours', 'patterns': ['What hours are you open?', 'What are your hours?', 'When are you open?', 'Opening times?'], 'responses': ["We're open every day 9am-9pm", 'Our hours are 9am-9pm every day']}, {'tag': 'mopeds', 'patterns': ['Which mopeds do you have?', 'What kinds of mopeds are there?', 'What do you rent?'], 'responses': ['We rent Yamaha, Piaggio and Vespa

In [21]:
######################################################################
# Let's start to build our training data
######################################################################
words = []
classes = []
documents = []
ignore_words = ['?']

# loop through each sentence in our intents patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
      
        # add to our words list
        words.extend(w)
        
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print('------------------------------------------------------')
print('-------------  Summary -------------------------------')
print('------------------------------------------------------')
print('')
print(len(classes), "classes\n", classes)
print('')
print('------------------------------------------------------')
print('')
print(len(words), "words\n", words)
print('')
print('------------------------------------------------------')
print('')
print(len(documents), "documents\n", documents)
print('')
print('------------------------------------------------------')
print('')
print(len(words), "unique stemmed words\n", words)
print('')

------------------------------------------------------
-------------  Summary -------------------------------
------------------------------------------------------

9 classes
 ['goodbye', 'greeting', 'hours', 'mopeds', 'opentoday', 'payments', 'rental', 'thanks', 'today']

------------------------------------------------------

59 words
 ["'d", "'s", ',', 'a', 'acceiv', 'anyon', 'ar', 'at', 'bye', 'can', 'card', 'cash', 'ciao', 'credit', 'day', 'do', 'doe', 'good', 'goodby', 'hav', 'hello', 'help', 'hey', 'hi', 'ho', 'hour', 'how', 'i', 'is', 'kind', 'lat', 'lik', 'mastercard', 'mop', 'much', 'of', 'on', 'op', 'perfect', 'rent', 'see', 'so', 'tak', 'thank', 'that', 'ther', 'thi', 'tim', 'to', 'today', 'up', 'want', 'we', 'what', 'when', 'which', 'work', 'yo', 'you']

------------------------------------------------------

38 documents
 [(['Hi'], 'greeting'), (['Hey'], 'greeting'), (['Ho'], 'greeting'), (['What', "'s", 'up'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'a

In [22]:
# create our training data
training = []
output = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])
    
# print('training: ' + str(training))

# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

# create train and test lists
X = list(training[:, 0])
y = list(training[:, 1])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=32, shuffle=True)

print('------------------------------------------------------')
print('-------------  Summary -------------------------------')
print('------------------------------------------------------')
print('')
print('Total elements in X: ' + str(len(X)) + ' consisting of') 
print('elements in X_train: ' + str(len(X_train)) + ' and X_test: ' + str(len(X_test)))
print('')
print('Total elements in y (labels): ' + str(len(y)) + ' consisting of') 
print('elements in y_train: ' + str(len(y_train)) + ' and y_test: ' + str(len(y_test)))



------------------------------------------------------
-------------  Summary -------------------------------
------------------------------------------------------

Total elements in X: 38 consisting of
elements in X_train: 28 and X_test: 10

Total elements in y (labels): 38 consisting of
elements in y_train: 28 and y_test: 10


In [23]:
print('------------------------------------------------------')
print('----- Let us look a specific training example --------')
print('------------------------------------------------------')
print('X_train[0] (bag of word references):', X_train[0])
print('------------------------------------------------------')
print('y_train[0] (class label):', y_train[0])
print('------------------------------------------------------')
print('All class labels:', classes)
print('------------------------------------------------------')
print('Our training example class label at index classes[' + str(y_train[0].index(1))+ ']=',
      classes[y_train[0].index(1)])  # TODO show clas

------------------------------------------------------
----- Let us look a specific training example --------
------------------------------------------------------
X_train[0] (bag of word references): [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]
------------------------------------------------------
y_train[0] (class label): [0, 0, 1, 0, 0, 0, 0, 0, 0]
------------------------------------------------------
All class labels: ['goodbye', 'greeting', 'hours', 'mopeds', 'opentoday', 'payments', 'rental', 'thanks', 'today']
------------------------------------------------------
Our training example class label at index classes[2]= hours


In [25]:
####################################################################################
# Build a very simple neural network
####################################################################################
model = Sequential()
model.add(Dense(100, activation="relu",input_dim=(np.array(X_train).shape[1])))
model.add(Dropout(0.2))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(len(classes), activation='softmax'))

# metrics
adam = tf.keras.optimizers.Adam(learning_rate=0.1, decay=0.005)
model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])
print(model.summary())

####################################################################################
# OPTIONAL for playing around you could add the following layers (watch out to 
# transform to correct shape)
# 
# model.add(Embedding(len(words), embedding_vector_length))
# model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
# model.add(MaxPooling1D(pool_size=2))
####################################################################################

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               6000      
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 50)                5050      
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 9)                 459       
Total params: 11,509
Trainable params: 11,509
Non-trainable params: 0
_________________________________________________________________
None


In [26]:
# Callbacks for the evaluation of the model
early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')
tensorboard_callback = TensorBoard(log_dir='./logs/')
checkpoint = ModelCheckpoint('./weights-improvement-{epoch:02d}-{loss:.4f}.hdf5',
                             monitor='loss', verbose=1, save_best_only=True, mode='min')
#callbacks_list = [checkpoint, tensorboard_callback, early_stop]
callbacks_list = [tensorboard_callback, early_stop]

nr_of_epoches=100
batch_size=32
history = model.fit(X_train,
                    y_train,
                    epochs=nr_of_epoches,
                    batch_size=batch_size,
                    validation_data=(X_test, y_test),
                    callbacks=callbacks_list)

Epoch 1/100
Epoch 2/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [27]:
#############################################################
# Let's have a look at a single test example from X_test 
#############################################################
print('')
print('X_test[0] in total looks like:\n\n', X_test[0])
print('')
print('------------------------------------------------------')
print('')
print('X_test[0] has the class index stored at y_test[0] with the label: classes[' + str(y_test[0].index(1))+ ']=',
      classes[y_test[0].index(1)]) 
print('')
print('------------------------------------------------------')
print('')
prediction = model.predict(np.array([X_test[0]]))
print('Total "raw" prediction for all classes looks like:\n\n', prediction)



X_test[0] in total looks like:

 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

------------------------------------------------------

X_test[0] has the class index stored at y_test[0] with the label: classes[0]= goodbye

------------------------------------------------------

Total "raw" prediction for all classes looks like:

 [[9.9984229e-01 9.8767174e-05 2.7909548e-06 5.5856111e-05 3.7807500e-11
  1.6927972e-07 5.0907464e-12 4.0597737e-08 6.2568839e-10]]


In [28]:
ERROR_THRESHOLD = 0.00000000001
# generate probabilities from the model
results = [[i, r] for i, r in enumerate(prediction[0]) if r > ERROR_THRESHOLD]
print('Our prediction translated to classes and probabilities:\n')
for r in results:
    print(classes[r[0]], round(r[1], 8))

Our prediction translated to classes and probabilities:

goodbye 0.99984235
greeting 9.877e-05
hours 2.79e-06
mopeds 5.586e-05
opentoday 0.0
payments 1.7e-07
thanks 4e-08
today 0.0


In [29]:
###################################################################################
# Let's define two needy functions to do the natural language preprocessing for us
# and build the bag of words (bow) for us from a sentence of words
###################################################################################
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [30]:
print('These are all the words for our classification:\n\n', words)
print('')
print('Our sentence is represented by the following bag of words (bow):\n')
p = bow("Can you please tell me if you are open today?", words, show_details=True)

These are all the words for our classification:

 ["'d", "'s", ',', 'a', 'acceiv', 'anyon', 'ar', 'at', 'bye', 'can', 'card', 'cash', 'ciao', 'credit', 'day', 'do', 'doe', 'good', 'goodby', 'hav', 'hello', 'help', 'hey', 'hi', 'ho', 'hour', 'how', 'i', 'is', 'kind', 'lat', 'lik', 'mastercard', 'mop', 'much', 'of', 'on', 'op', 'perfect', 'rent', 'see', 'so', 'tak', 'thank', 'that', 'ther', 'thi', 'tim', 'to', 'today', 'up', 'want', 'we', 'what', 'when', 'which', 'work', 'yo', 'you']

Our sentence is represented by the following bag of words (bow):

found in bag: can
found in bag: you
found in bag: you
found in bag: ar
found in bag: op
found in bag: today


In [31]:
print('Kindly reminder: our classes we want to predict are:\n\n', classes)

ERROR_THRESHOLD=0.0001
prediction = model.predict(np.array([p]))
# generate probabilities from the model
results = [[i, r] for i, r in enumerate(prediction[0]) if r > ERROR_THRESHOLD]

print('\nOur prediction is:\n')
for r in results:
    print('=> ', classes[r[0]], round(r[1],4))

Kindly reminder: our classes we want to predict are:

 ['goodbye', 'greeting', 'hours', 'mopeds', 'opentoday', 'payments', 'rental', 'thanks', 'today']

Our prediction is:

=>  opentoday 1.0


In [32]:
########################################################################
# Let's create a needy data structure to 
# (1) hold and track the user context
# (2) classifies our sentence to a class
# (3) generates a contextualized response for a specific user 
#        based on 3 elements
#       (a) class with highest prediction propability
#       (b) a specific user id
#       (c) context set
########################################################################

# (1) hold and track the user context
context = {}

# (2) classifies our sentence to a class
def classify(sentence):
    # generate probabilities from the model
    results = model.predict(np.array([bow(sentence, words)]))[0]
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

# (3) generates a contextualized response for a specific user
def response(sentence, user_id='123', show_details=False):
    results = classify(sentence)
    # if we have a classification then find the matching intent tag
    if results:
        # loop as long as there are matches to process
        while results:
            for i in intents['intents']:
                # find a tag matching the first result
                if i['tag'] == results[0][0]:
                    # set context for this intent if necessary
                    if 'context_set' in i:
                        if show_details: print ('context:', i['context_set'])
                        context[user_id] = i['context_set']

                    # check if this intent is contextual and applies to this user's conversation
                    if not 'context_filter' in i or \
                        (user_id in context and 'context_filter' in i and i['context_filter'] == context[user_id]):
                        if show_details: print ('tag:', i['tag'])
                        # a random response from the intent
                        return print(random.choice(i['responses']))

            results.pop(0)

In [33]:
response('Hey')

Hi there, how can I help?


In [34]:
classify('What are you opening times today?')

[('opentoday', 1.0)]

In [35]:
response('is your shop open today?')

Our hours are 9am-9pm every day


In [36]:
# So how does this context thing work?
context = {}

In [37]:
response('Can we rent a moped?', user_id='Andreas', show_details=True)

context: rentalday
tag: rental
Are you looking to rent today or later this week?


In [38]:
response('today', user_id='Andreas',show_details=True)

tag: today
For rentals today please call 1-800-MYMOPED


In [39]:
response('Bye bye')

Bye! Looking forward to seeing you again soon!
