In [3]:
# Libraries needed for NLP
import nltk
nltk.download('punkt')
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

# Libraries needed for Tensorflow processing
import tensorflow as tf
import numpy as np
import tflearn
import random
import json
from google.colab import files
files.upload() 


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Saving intents.json to intents.json


{'intents.json': b'{"intents": [\n        {"tag": "greeting",\n         "patterns": ["Hi", "How are you", "Is anyone there?", "Hello", "Good day"],\n         "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"],\n         "context_set": ""\n        },\n        {"tag": "goodbye",\n         "patterns": ["Bye", "See you later", "Goodbye"],\n         "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."]\n        },\n        {"tag": "thanks",\n         "patterns": ["Thanks", "Thank you", "That\'s helpful"],\n         "responses": ["Happy to help!", "Any time!", "My pleasure"]\n        },\n        {"tag": "hours",\n         "patterns": ["What hours are you open?", "What are your hours?", "When are you open?" ],\n         "responses": ["We\'re open every day 9am-9pm", "Our hours are 9am-9pm every day"]\n        },\n        {"tag": "location",\n         "patterns": ["What is your location?", "Where are 

In [6]:
# Import our chat-bit intents file
with open('intents.json') as json_data:
  intents = json.load(json_data)
words = []
classes = []
documents = []
ignore = ['?']
# loop through each sentence in the intent's patterns
for intent in intents['intents']:
  for pattern in intent['patterns']:
    # Tokenize each and every word in the sentence
    w = nltk.word_tokenize(pattern)
    # Add word to the word list
    words.extend(w)
    # Add word(s) to documents
    documents.append((w, intent['tag']))
    # Add tags to our classes list
    if intent['tag'] not in classes:
      classes.append(intent['tag'])
# perform stemming ad lower each word as well as remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore]
words = sorted(list(set(words)))
# set function removes duplicates

# Remove duplicate classes
classes = sorted(list(set(classes)))

print(len(documents), " Documents", documents)
print(len(classes), " Classes", classes)
print(len(words), " unique stemmed words", words)
# Create training data
training = []
output = []

# Create an empty array for output
output_empty = [0] * len(classes)
print(output_empty)
# Create training set, bag of words for each sentence
for doc in documents:
  #initialize bag of words
  bag = []
  # List of tokenized words for the pattern
  pattern_words = doc[0]
  # Stemming each word
  pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
  # Create bag of words array
  for w in words:
    bag.append(1) if w in pattern_words else bag.append(0)
  # print(bag)
  # Output is 1 for current tag and 0 for rest of other tags
  output_row = list(output_empty)
  # print(output_row)
  output_row[classes.index(doc[1])] = 1
  # print(output_row)

  training.append([bag, output_row])
print(training)
#s Shuffling features and training it into np.array
random.shuffle(training)
training = np.array(training)

#Creating training lists
train_x = list(training[:,0])
train_y = list(training[:,1])
# print(train_x)
# print(train_y)
# resetting underlying graph data
tf.reset_default_graph()

#building neural networks 
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 10)
net = tflearn.fully_connected(net ,10)
net = tflearn.fully_connected(net , len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

#defining model and setting up tensorboard
model = tflearn.DNN(net ,tensorboard_dir='tflearn_logs')

#start training 
model.fit(train_x , train_y, n_epoch=1000 , batch_size = 8 ,show_metric = True)
model.save('model.tflearn')
import pickle
pickle.dump({
    "words":words,
    "classes":classes,
    "train_x":train_x,
    "train_y":train_y},
    open('training_data','wb')
    )
# restoring all the data structures
data = pickle.load( open( "training_data", "rb" ) )
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']
with open('intents.json') as json_data:
    intents = json.load(json_data)
#loading saved data 
model.load('./model.tflearn')

Training Step: 3999  | total loss: [1m[32m0.35418[0m[0m | time: 0.012s
| Adam | epoch: 1000 | loss: 0.35418 - acc: 0.9556 -- iter: 24/31
Training Step: 4000  | total loss: [1m[32m0.32482[0m[0m | time: 0.016s
| Adam | epoch: 1000 | loss: 0.32482 - acc: 0.9600 -- iter: 31/31
--
INFO:tensorflow:/content/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.
INFO:tensorflow:Restoring parameters from /content/model.tflearn


In [0]:
def clean_up_sentence(sentence):
    # tokenizing the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stemming each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# returning bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenizing the pattern
    sentence_words = clean_up_sentence(sentence)
    # generating bag of words
    bag = [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [0]:
ERROR_THRESHOLD = 0.30
def classify(sentence):
    # generate probabilities from the model
    results = model.predict([bow(sentence, words)])[0]
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

def response(sentence, userID='123', show_details=False):
    results = classify(sentence)
    # if we have a classification then find the matching intent tag
    if results:
        # loop as long as there are matches to process
        while results:
            for i in intents['intents']:
                # find a tag matching the first result
                if i['tag'] == results[0][0]:
                    # a random response from the intent
                    return print(random.choice(i['responses']))

            results.pop(0)