<a href="https://colab.research.google.com/github/robert-brosh23/DemoChatbot/blob/main/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Data

In [1]:
import json

with open("data.json") as json_data:
  data = json.load(json_data)

print(data)

{'intents': [{'tag': 'greeting', 'patterns': ['Hello', 'Hi', 'I need help', 'Hey'], 'responses': ['Hi there! How can I help?', 'Hello, and welcome to this chatbot'], 'context_set': ''}, {'tag': 'bye', 'patterns': ['Thank you for the help', 'Bye', 'Great thanks'], 'responses': ['Do you have any further questions?', 'Thanks for asking a question']}, {'tag': 'courses', 'patterns': ['What are the courses available?', 'Do you have coding courses?'], 'responses': ['We have courses on creative design, programming and machine learning', 'We have over 300 courses available']}, {'tag': 'coding', 'patterns': ['What coding courses do you have?', 'I want to learn programming'], 'responses': ['We have many courses, including Hello Coding and Python for Automation', 'Check out our site listing for a complete list of courses']}, {'tag': 'machinelearning', 'patterns': ['What machine learning courses do you teach?', 'Do you teach AI?', 'I want to learn artificial intelligence'], 'responses': ['We have C

Tokenize data

In [2]:
import nltk
nltk.download('punkt')

words = []
documents = []
classes = []

for intent in data["intents"]:
  for pattern in intent["patterns"]:
    word = nltk.word_tokenize(pattern)

    words.extend(word)
    documents.append((word, intent["tag"]))

    if intent["tag"] not in classes:
      classes.append(intent["tag"])

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
print(words)

['Hello', 'Hi', 'I', 'need', 'help', 'Hey', 'Thank', 'you', 'for', 'the', 'help', 'Bye', 'Great', 'thanks', 'What', 'are', 'the', 'courses', 'available', '?', 'Do', 'you', 'have', 'coding', 'courses', '?', 'What', 'coding', 'courses', 'do', 'you', 'have', '?', 'I', 'want', 'to', 'learn', 'programming', 'What', 'machine', 'learning', 'courses', 'do', 'you', 'teach', '?', 'Do', 'you', 'teach', 'AI', '?', 'I', 'want', 'to', 'learn', 'artificial', 'intelligence', 'Do', 'you', 'teach', 'creative', 'courses', 'Do', 'you', 'have', 'non', 'coding', 'courses', 'I', 'want', 'to', 'learn', 'something', 'else']


In [4]:
print(classes)

['greeting', 'bye', 'courses', 'coding', 'machinelearning', 'creative']


In [5]:
print(documents)

[(['Hello'], 'greeting'), (['Hi'], 'greeting'), (['I', 'need', 'help'], 'greeting'), (['Hey'], 'greeting'), (['Thank', 'you', 'for', 'the', 'help'], 'bye'), (['Bye'], 'bye'), (['Great', 'thanks'], 'bye'), (['What', 'are', 'the', 'courses', 'available', '?'], 'courses'), (['Do', 'you', 'have', 'coding', 'courses', '?'], 'courses'), (['What', 'coding', 'courses', 'do', 'you', 'have', '?'], 'coding'), (['I', 'want', 'to', 'learn', 'programming'], 'coding'), (['What', 'machine', 'learning', 'courses', 'do', 'you', 'teach', '?'], 'machinelearning'), (['Do', 'you', 'teach', 'AI', '?'], 'machinelearning'), (['I', 'want', 'to', 'learn', 'artificial', 'intelligence'], 'machinelearning'), (['Do', 'you', 'teach', 'creative', 'courses'], 'creative'), (['Do', 'you', 'have', 'non', 'coding', 'courses'], 'creative'), (['I', 'want', 'to', 'learn', 'something', 'else'], 'creative')]


Clean Data

In [6]:
from nltk.stem.lancaster import LancasterStemmer

stemmer = LancasterStemmer()

words = [stemmer.stem(word.lower()) for word in words]

print(words)

['hello', 'hi', 'i', 'nee', 'help', 'hey', 'thank', 'you', 'for', 'the', 'help', 'bye', 'gre', 'thank', 'what', 'ar', 'the', 'cours', 'avail', '?', 'do', 'you', 'hav', 'cod', 'cours', '?', 'what', 'cod', 'cours', 'do', 'you', 'hav', '?', 'i', 'want', 'to', 'learn', 'program', 'what', 'machin', 'learn', 'cours', 'do', 'you', 'teach', '?', 'do', 'you', 'teach', 'ai', '?', 'i', 'want', 'to', 'learn', 'art', 'intellig', 'do', 'you', 'teach', 'cre', 'cours', 'do', 'you', 'hav', 'non', 'cod', 'cours', 'i', 'want', 'to', 'learn', 'someth', 'els']


In [7]:
words = sorted(list(set(words)))

print(words)

['?', 'ai', 'ar', 'art', 'avail', 'bye', 'cod', 'cours', 'cre', 'do', 'els', 'for', 'gre', 'hav', 'hello', 'help', 'hey', 'hi', 'i', 'intellig', 'learn', 'machin', 'nee', 'non', 'program', 'someth', 'teach', 'thank', 'the', 'to', 'want', 'what', 'you']


Build bag of words for ML model

In [8]:
print(documents)

[(['Hello'], 'greeting'), (['Hi'], 'greeting'), (['I', 'need', 'help'], 'greeting'), (['Hey'], 'greeting'), (['Thank', 'you', 'for', 'the', 'help'], 'bye'), (['Bye'], 'bye'), (['Great', 'thanks'], 'bye'), (['What', 'are', 'the', 'courses', 'available', '?'], 'courses'), (['Do', 'you', 'have', 'coding', 'courses', '?'], 'courses'), (['What', 'coding', 'courses', 'do', 'you', 'have', '?'], 'coding'), (['I', 'want', 'to', 'learn', 'programming'], 'coding'), (['What', 'machine', 'learning', 'courses', 'do', 'you', 'teach', '?'], 'machinelearning'), (['Do', 'you', 'teach', 'AI', '?'], 'machinelearning'), (['I', 'want', 'to', 'learn', 'artificial', 'intelligence'], 'machinelearning'), (['Do', 'you', 'teach', 'creative', 'courses'], 'creative'), (['Do', 'you', 'have', 'non', 'coding', 'courses'], 'creative'), (['I', 'want', 'to', 'learn', 'something', 'else'], 'creative')]


In [9]:
empty_output = [0] *len(classes)

print(empty_output)

[0, 0, 0, 0, 0, 0]


In [10]:
training_data = []

for document in documents:
  bag_of_words = []

  pattern_words = document[0]
  pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]

  for word in words:
    bag_of_words.append(1) if word in pattern_words else bag_of_words.append(0)

  output_row = list(empty_output)
  output_row[classes.index(document[1])] = 1
  training_data.append([bag_of_words, output_row])

In [11]:
print(training_data)

[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]], [[1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0]], [[1, 0, 0, 0, 0

Split data for machine learning

In [12]:
import random

random.shuffle(training_data)

print(training_data)

[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 1]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], [[1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 1, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1]], [[1, 1, 0, 0, 0

In [13]:
import numpy

training_numpy = numpy.array(training_data)

print(training_numpy)

[[list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1])
  list([0, 1, 0, 0, 0, 0])]
 [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])
  list([0, 1, 0, 0, 0, 0])]
 [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
  list([1, 0, 0, 0, 0, 0])]
 [list([1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
  list([0, 0, 1, 0, 0, 0])]
 [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0])
  list([0, 0, 0, 0, 0, 1])]
 [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
  list([1, 0, 0, 0, 0, 0])]
 [list([1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1])
  list([0, 0, 0, 1, 0, 0])]
 [list([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0

  training_numpy = numpy.array(training_data)


In [14]:
train_X = list(training_numpy[:,0])

print(train_X)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0,

In [15]:
train_y = list(training_numpy[:,1])

print(train_y)

[[0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]]


Build a TensorFlow machine learning model for chat

In [16]:
!pip install tflearn



In [17]:
import tflearn

neural_network = tflearn.input_data(shape = [None, len(train_X[0])])

print(neural_network)

Instructions for updating:
non-resource variables are not supported in the long term


Tensor("InputData/X:0", shape=(?, 33), dtype=float32)


In [18]:
neural_network = tflearn.fully_connected(neural_network, 8)
neural_network = tflearn.fully_connected(neural_network, 8)

print(neural_network)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Tensor("FullyConnected_1/BiasAdd:0", shape=(?, 8), dtype=float32)


In [19]:
neural_network = tflearn.fully_connected(neural_network, len(train_y[0]), activation="softmax")

print(neural_network)

Tensor("FullyConnected_2/Softmax:0", shape=(?, 6), dtype=float32)


In [20]:
neural_network = tflearn.regression(neural_network)

print(neural_network)

Tensor("FullyConnected_2/Softmax:0", shape=(?, 6), dtype=float32)


In [21]:
model = tflearn.DNN(neural_network)

print(model)

<tflearn.models.dnn.DNN object at 0x7d794851c8b0>


In [22]:
model.fit(train_X, train_y, n_epoch = 2000, batch_size = 8, show_metric = True)

Training Step: 5999  | total loss: [1m[32m0.00162[0m[0m | time: 0.011s
| Adam | epoch: 2000 | loss: 0.00162 - acc: 1.0000 -- iter: 16/17
Training Step: 6000  | total loss: [1m[32m0.00165[0m[0m | time: 0.017s
| Adam | epoch: 2000 | loss: 0.00165 - acc: 1.0000 -- iter: 17/17
--


In [23]:
model.save("chatbot_dnn.tflearn")

In [24]:
model.load("chatbot_dnn.tflearn")

print(model)

<tflearn.models.dnn.DNN object at 0x7d794851c8b0>


Process input/output to test ChatBot

In [25]:
question = "Do you sell any coding course?"

def process_question(question):
  question_tokenized = nltk.word_tokenize(question)

  question_stemmed = [stemmer.stem(word.lower()) for word in question_tokenized]

  bag = [0] * len(words)

  for stem in question_stemmed:
    for index, word in enumerate(words):
      if word == stem:
        bag[index] = 1

  return(numpy.array(bag))

prediction = model.predict([process_question(question)])[0]

In [26]:
print(classes)

['greeting', 'bye', 'courses', 'coding', 'machinelearning', 'creative']


In [27]:
print(prediction)

[1.5320127e-14 2.3716882e-06 9.6613944e-01 3.3857435e-02 4.0300892e-12
 7.0112321e-07]


In [28]:
def categorize(prediction):

  prediction_top = [[index,result] for index,result in enumerate(prediction) if result > 0.5]

  prediction_top.sort(key=lambda x: x[1], reverse = True)

  result = []
  for prediction_value in prediction_top:
    result.append((classes[prediction_value[0]], prediction_value[1]))

  return result


In [52]:
categorize(prediction)

[('courses', 0.96613944)]

In [64]:
def chatbot(question):
  prediction = model.predict([process_question(question)])
  return categorize(prediction[0])

In [62]:
print(chatbot("Do you have non-coding content?"))

[('courses', 0.9475104)]


In [32]:
chatbot("Hello")

[('greeting', 0.99774253)]


In [34]:
user_input = input("Do you have a question for me?")

Do you have a question for me?yes


In [35]:
print(user_input)

yes


In [46]:
def respond_to_input(user_input):
  question_category = chatbot(user_input)
  if question_category:
    while question_category:
      for intent in data["intents"]:
        if intent["tag"] == question_category[0][0]:
          return random.choice(intent["responses"])

In [40]:
respond_to_input(user_input)

[('courses', 0.99967396)]


In [65]:
while True:
  user_input = input("Do you have a question for me?\n")
  response = respond_to_input(user_input)
  print(response)

Do you have a question for me?
hello
Hi there! How can I help?
Do you have a question for me?
What is available?
We have over 300 courses available
Do you have a question for me?
I would like to learn some programming
Check out our site listing for a complete list of courses
Do you have a question for me?
are there any coding courses?
We have over 300 courses available
Do you have a question for me?
can you teach programming?
We have many courses, including Hello Coding and Python for Automation
Do you have a question for me?
can you teach programming?
Check out our site listing for a complete list of courses
Do you have a question for me?
coding
We have courses on creative design, programming and machine learning
Do you have a question for me?
coding
We have over 300 courses available
Do you have a question for me?
I'd like to learn some machine learning
We have Complete Machine Learning, ChatGPT Bundle and much more


KeyboardInterrupt: ignored