# Install Dependencies

In [1]:
!pip install tflearn
!pip install tensorflow
!pip install nltk
!pip install numpy

Collecting tflearn
  Downloading tflearn-0.5.0.tar.gz (107 kB)
     ------------------------------------ 107.3/107.3 kB 890.1 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting Pillow
  Using cached Pillow-9.1.1-cp39-cp39-win_amd64.whl (3.3 MB)
Using legacy 'setup.py install' for tflearn, since package 'wheel' is not installed.
Installing collected packages: Pillow, tflearn
  Running setup.py install for tflearn: started
  Running setup.py install for tflearn: finished with status 'done'
Successfully installed Pillow-9.1.1 tflearn-0.5.0
Collecting tensorflow
  Downloading tensorflow-2.9.1-cp39-cp39-win_amd64.whl (444.0 MB)
     ---------------------------------------- 444.0/444.0 MB ? eta 0:00:00
Collecting h5py>=2.9.0
  Using cached h5py-3.7.0-cp39-cp39-win_amd64.whl (2.6 MB)
Collecting libclang>=13.0.0
  Using cached libclang-14.0.1-py2.py3-none-win_amd64.whl (14.2 MB)
Collecting wrapt>=1.11.0
  Downloadin

# Import dependencies

In [36]:
import nltk 
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

import numpy
import tflearn

import tensorflow 
from tensorflow.python.framework import ops
ops.reset_default_graph()

import random

nltk.download('punkt') # for nltk.word_tokenize to work

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Kambal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Import Dataset

In [28]:
import os
import json

with open(os.path.join("dataset", "intents.json")) as file:
    data = json.load(file)
    # Print json file
    print(data['intents'])

[{'tag': 'greeting', 'patterns': ['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day', 'Whats up'], 'responses': ['Hello!', 'Good to see you again!', 'Hi there, how can I help?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['cya', 'See you later', 'Goodbye', 'I am Leaving', 'Have a Good day'], 'responses': ['Sad to see you go :(', 'Talk to you later', 'Goodbye!'], 'context_set': ''}, {'tag': 'age', 'patterns': ['how old', 'how old is tim', 'what is your age', 'how old are you', 'age?'], 'responses': ['I am 18 years old!', '18 years young!'], 'context_set': ''}, {'tag': 'name', 'patterns': ['what is your name', 'what should I call you', 'whats your name?'], 'responses': ['You can call me Tim.', "I'm Tim!", "I'm Tim aka Tech With Tim."], 'context_set': ''}, {'tag': 'shop', 'patterns': ['Id like to buy something', 'whats on the menu', 'what do you reccommend?', 'could i get something to eat'], 'responses': ['We sell chocolate chip cookies for $2!', 'Cookies are on the men

# Pre-processing data

In [29]:
words = [] # user will say
labels = [] # tags
docs_x = [] 
docs_y = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        wrds = nltk.word_tokenize(pattern)
        words.extend(wrds)
        docs_x.append(wrds)
        docs_y.append(intent["tag"])
        
    if intent['tag'] not in labels:
        labels.append(intent['tag'])

print("Patterns/Words:")
print(words)
print("\n")

print("Greetings/Labels:")
print(labels)

print("\n")
print("DocsX:")
print(docs_x)

print("\n")
print("DocsY:")
print(docs_y)

Patterns/Words:
['Hi', 'How', 'are', 'you', 'Is', 'anyone', 'there', '?', 'Hello', 'Good', 'day', 'Whats', 'up', 'cya', 'See', 'you', 'later', 'Goodbye', 'I', 'am', 'Leaving', 'Have', 'a', 'Good', 'day', 'how', 'old', 'how', 'old', 'is', 'tim', 'what', 'is', 'your', 'age', 'how', 'old', 'are', 'you', 'age', '?', 'what', 'is', 'your', 'name', 'what', 'should', 'I', 'call', 'you', 'whats', 'your', 'name', '?', 'Id', 'like', 'to', 'buy', 'something', 'whats', 'on', 'the', 'menu', 'what', 'do', 'you', 'reccommend', '?', 'could', 'i', 'get', 'something', 'to', 'eat', 'when', 'are', 'you', 'guys', 'open', 'what', 'are', 'your', 'hours', 'hours', 'of', 'operation']


Greetings/Labels:
['greeting', 'goodbye', 'age', 'name', 'shop', 'hours']


DocsX:
[['Hi'], ['How', 'are', 'you'], ['Is', 'anyone', 'there', '?'], ['Hello'], ['Good', 'day'], ['Whats', 'up'], ['cya'], ['See', 'you', 'later'], ['Goodbye'], ['I', 'am', 'Leaving'], ['Have', 'a', 'Good', 'day'], ['how', 'old'], ['how', 'old', 'is', '

# Stem the words and remove duplicates

Example is <strong>Flying</strong>. <strong>Fly</strong> is the stem word <br>
and <strong>ing</strong> is the suffix word.

In [31]:
# Transform to lowercase and stem the words and remove "?"
words = [stemmer.stem(w.lower()) for w in words if w != "?"]
         
# Remove duplicates and transform again to list 
words = sorted(list(set(words))) 

labels = sorted(labels) # sort tags alphabetically

print(words)
print("\n")
print(labels)

['a', 'ag', 'am', 'anyon', 'ar', 'buy', 'cal', 'could', 'cya', 'day', 'do', 'eat', 'get', 'good', 'goodby', 'guy', 'hav', 'hello', 'hi', 'hour', 'how', 'i', 'id', 'is', 'lat', 'leav', 'lik', 'menu', 'nam', 'of', 'old', 'on', 'op', 'reccommend', 'see', 'should', 'someth', 'the', 'ther', 'tim', 'to', 'up', 'what', 'when', 'yo', 'you']


['age', 'goodbye', 'greeting', 'hours', 'name', 'shop']


# Create a BAG OF WORDS
> Nueral networks don't understand string so we need to convert them in numerical value
> Bag of words represents all of the words in a given pattern that will use to train our model

In [32]:
training = []
output = []

out_empty = [0 for _ in range(len(labels))]

for x, doc in enumerate(docs_x):
    bag = []

    wrds = [stemmer.stem(w.lower()) for w in doc]

    for w in words:
        if w in wrds:
            bag.append(1)
        else:
            bag.append(0)

    output_row = out_empty[:]
    output_row[labels.index(docs_y[x])] = 1

    training.append(bag)
    output.append(output_row)    
    
    
print(training, type(training))    

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 

In [33]:
# Convert the list to numpy array
training = numpy.array(training)  
output = numpy.array(output)   

print(training, type(training))

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 1]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 0]] <class 'numpy.ndarray'>


# Building Model using tflearn

In [37]:
net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")
net = tflearn.regression(net)

# Building a Model
model = tflearn.DNN(net)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


# Training & Saving the Model
Now that we have setup our model its time to train it on our data! To do these we will fit our data to the model. The number of epochs we set is the amount of times that the model will see the same information while training.

In [46]:
try:
    model.load("model.tflearn") # Load the model if model has already been created
except:
    # Train and save the model if it does not exist
    model.fit(training, output, n_epoch=1500, batch_size=8, show_metric=True)
    model.save("model.tflearn")

INFO:tensorflow:Restoring parameters from D:\(E)\Programming Tuts\Projects Tutorials\Python\Python Chatbot Tensorflow\model.tflearn
It already exists


# Saving the pre-processed data (optional)

In [41]:
import pickle

with open("data.pickle", "wb") as f:
        pickle.dump((words, labels, training, output), f)