In [24]:

import json

with open("data.json") as json_data:
    data = json.load(json_data)
print(data)

{'intents': [{'tag': 'greeting', 'patterns': ['Hello', 'Hi', 'I need help', 'Hey'], 'responses': ['Hi there! How can I help?', 'Hello, and welcome to this chatbot'], 'context_set': ''}, {'tag': 'bye', 'patterns': ['Thank you for the help', 'Bye', 'Great thanks'], 'responses': ['Do you have any further questions?', 'Thanks for asking a question']}, {'tag': 'courses', 'patterns': ['What are the courses available?', 'Do you have courses?'], 'responses': ['We have courses on creative design, programming and machine learning', 'We have over 300 courses available']}, {'tag': 'coding', 'patterns': ['What coding courses do you have?', 'I want to learn programming'], 'responses': ['We have many courses, including Hello Coding and Python for Automation', 'Check out our site listing for a complete list of courses']}, {'tag': 'machinelearning', 'patterns': ['What machine learning courses do you teach?', 'Do you teach AI?', 'I want to learn artificial intelligence'], 'responses': ['We have Complete

In [25]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [26]:
words = []
pattern_words_with_tag = []
classes = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        pattern_words = nltk.word_tokenize(pattern)

        words.extend(pattern_words)
        pattern_words_with_tag.append((pattern_words, intent["tag"])) # Tuple type

        if intent["tag"] not in classes:
            classes.append(intent["tag"])

In [27]:
print(words)

['Hello', 'Hi', 'I', 'need', 'help', 'Hey', 'Thank', 'you', 'for', 'the', 'help', 'Bye', 'Great', 'thanks', 'What', 'are', 'the', 'courses', 'available', '?', 'Do', 'you', 'have', 'courses', '?', 'What', 'coding', 'courses', 'do', 'you', 'have', '?', 'I', 'want', 'to', 'learn', 'programming', 'What', 'machine', 'learning', 'courses', 'do', 'you', 'teach', '?', 'Do', 'you', 'teach', 'AI', '?', 'I', 'want', 'to', 'learn', 'artificial', 'intelligence', 'Do', 'you', 'teach', 'creative', 'courses', 'Do', 'you', 'have', 'non', 'coding', 'courses', 'I', 'want', 'to', 'learn', 'something', 'else']


In [28]:
#a list of tuples
print(pattern_words_with_tag)

[(['Hello'], 'greeting'), (['Hi'], 'greeting'), (['I', 'need', 'help'], 'greeting'), (['Hey'], 'greeting'), (['Thank', 'you', 'for', 'the', 'help'], 'bye'), (['Bye'], 'bye'), (['Great', 'thanks'], 'bye'), (['What', 'are', 'the', 'courses', 'available', '?'], 'courses'), (['Do', 'you', 'have', 'courses', '?'], 'courses'), (['What', 'coding', 'courses', 'do', 'you', 'have', '?'], 'coding'), (['I', 'want', 'to', 'learn', 'programming'], 'coding'), (['What', 'machine', 'learning', 'courses', 'do', 'you', 'teach', '?'], 'machinelearning'), (['Do', 'you', 'teach', 'AI', '?'], 'machinelearning'), (['I', 'want', 'to', 'learn', 'artificial', 'intelligence'], 'machinelearning'), (['Do', 'you', 'teach', 'creative', 'courses'], 'creative'), (['Do', 'you', 'have', 'non', 'coding', 'courses'], 'creative'), (['I', 'want', 'to', 'learn', 'something', 'else'], 'creative')]


In [29]:
print(classes)

['greeting', 'bye', 'courses', 'coding', 'machinelearning', 'creative']


In [30]:
#  Clean chat data for machine learning
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

words_lowercase = [stemmer.stem(word.lower()) for word in words]

# converting to set will remove duplicate elements
unique_words = sorted(list(set(words_lowercase)))
print(unique_words)

['?', 'ai', 'ar', 'art', 'avail', 'bye', 'cod', 'cours', 'cre', 'do', 'els', 'for', 'gre', 'hav', 'hello', 'help', 'hey', 'hi', 'i', 'intellig', 'learn', 'machin', 'nee', 'non', 'program', 'someth', 'teach', 'thank', 'the', 'to', 'want', 'what', 'you']


In [31]:
empty_output = [0] * len(classes)
print(empty_output)

output_row = list(empty_output)
print(output_row)


[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]


In [32]:
#  Build bag of words for ML model

# print(documents)
empty_output = [0] * len(classes)
# print(empty_output)

training_data = []

for tuple in pattern_words_with_tag:
    bag_of_words = []

    # Tuple: ([pattern_words], tag)
    pattern_words = tuple[0]
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]

    for unique_word in unique_words:
        bag_of_words.append(1) if unique_word in pattern_words else bag_of_words.append(0)

    output_row = list(empty_output)
    output_row[classes.index(tuple[1])] = 1
    training_data.append([bag_of_words, output_row])

#print(pattern_words)

#training_data: a list of a tuple of ([bag of words], [bag of tag])
#https://en.wikipedia.org/wiki/Bag-of-words_model
print(training_data)



[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]], [[1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0]], [[1, 0, 0, 0, 0

In [33]:
print(len(training_data))

17


In [34]:
#  Split data for machine learning

import random
random.shuffle(training_data)

#print(training_data)
#print(type(training_data))

import numpy
training_numpy = numpy.array(training_data, dtype=object)

#print(training_numpy)
#print(type(training_numpy))

train_X = list(training_numpy[:,0]) # to access column 0

print(train_X)
#print(len(train_X))

train_Y = list(training_numpy[:,1]) # to access column 1
print(train_Y)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [35]:
#  Build a TensorFlow machine learning model for chat

import tflearn 
neural_network = tflearn.input_data(shape = [None, len(train_X[0])])
print(neural_network)

neural_network = tflearn.fully_connected(neural_network, 8)
print(neural_network)

neural_network = tflearn.fully_connected(neural_network, 8)
print(neural_network)

neural_network = tflearn.fully_connected(neural_network, len(train_Y[0]), activation="softmax")
print(neural_network)

neural_network = tflearn.regression(neural_network)
print(neural_network)

model = tflearn.DNN(neural_network)
print(model)

model.fit(train_X, train_Y, n_epoch = 2000, batch_size = 8, show_metric = True)

Training Step: 5999  | total loss: [1m[32m0.00046[0m[0m | time: 0.010s
| Adam | epoch: 2000 | loss: 0.00046 - acc: 1.0000 -- iter: 16/17
Training Step: 6000  | total loss: [1m[32m0.00047[0m[0m | time: 0.016s
| Adam | epoch: 2000 | loss: 0.00047 - acc: 1.0000 -- iter: 17/17
--


In [36]:
# Test chatbot machine learning model
model.save("chatbot_dnn.tflearn")


INFO:tensorflow:C:\Users\sin\G\Personal\python-sandbox\ChatBot\chatbot_dnn.tflearn is not in all_model_checkpoint_paths. Manually adding it.


['Hello', 'Hi', 'I', 'need', 'help', 'Hey', 'Thank', 'you', 'for', 'the', 'help', 'Bye', 'Great', 'thanks', 'What', 'are', 'the', 'courses', 'available', '?', 'Do', 'you', 'have', 'courses', '?', 'What', 'coding', 'courses', 'do', 'you', 'have', '?', 'I', 'want', 'to', 'learn', 'programming', 'What', 'machine', 'learning', 'courses', 'do', 'you', 'teach', '?', 'Do', 'you', 'teach', 'AI', '?', 'I', 'want', 'to', 'learn', 'artificial', 'intelligence', 'Do', 'you', 'teach', 'creative', 'courses', 'Do', 'you', 'have', 'non', 'coding', 'courses', 'I', 'want', 'to', 'learn', 'something', 'else']


In [52]:
model.load("chatbot_dnn.tflearn")

print(model)

question = "Do you sell any coding course?"

def process_question(question):
    question_tokenized = nltk.word_tokenize(question)
    question_stemmed = [stemmer.stem(word.lower()) for word in question_tokenized]

    bag = [0] * len(unique_words)

    for stem in question_stemmed:
        for index, word in enumerate(unique_words):
            if word == stem:
                bag[index] = 1

    return(numpy.array(bag))

processed_question = process_question(question)
print(len(processed_question))

INFO:tensorflow:Restoring parameters from C:\Users\sin\G\Personal\python-sandbox\ChatBot\chatbot_dnn.tflearn
<tflearn.models.dnn.DNN object at 0x00000137CB883CA0>
33


In [56]:
prediction = model.predict([processed_question])
print(prediction)
#print(classes)

[[2.6559168e-09 8.0910176e-08 3.6639106e-04 8.4420657e-01 3.4384280e-05
  1.5539253e-01]]
