In [1]:
# Libraries needed for NLP
import nltk
nltk.download('punkt')
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

# Libraries needed for Tensorflow processing
import tensorflow as tf
import numpy as np
import tflearn
import random
import json

[nltk_data] Downloading package punkt to /home/python1/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
# import our chat-bot intents file
with open('intents.json',encoding="utf8", errors='ignore') as json_data:
    intents = json.load(json_data)

In [3]:
intents

{'intents': [{'tag': 'EQAUP',
   'patterns': ['Is there a username and password for the EQAUP program?',
    'Do I need a username and password for the EQAUP program?',
    'Do I need a specific username and password for the EQAUP program?',
    ' Do I need to contact the deanship to get my username and password for the EQAUP program?'],
   'responses': ['No, there is no username or password for the program, but you can enter the program with the same username and password to access the university network',
    'No, you can use the credentials of the SSO'],
   'context': ['EQAUP', 'EQAUP-Plus']},
  {'tag': 'EQAUP system',
   'patterns': ['How is the EQAUP system accessed?',
    'How can I access the EQAUP system?',
    'What are the steps for accessign the EQAUP system?',
    'Can I access the EQAUP?'],
   'responses': ["The system is accessed through: 1. The university's website by following the following path: Deanships - Deanship of Quality and Academic Accreditation (the deanship's

In [4]:
words = []
classes = []
documents = []
ignore = ['?']
# loop through each sentence in the intent's patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each and every word in the sentence
        w = nltk.word_tokenize(pattern)
        # add word to the words list
        words.extend(w)
        # add word(s) to documents
        documents.append((w, intent['tag']))
        # add tags to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [5]:
words = [stemmer.stem(w.lower()) for w in words if w not in ignore]
words = sorted(list(set(words)))

classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)

140 documents
26 classes [' steps for Quality', ' university', 'Academic Accreditation', 'EQAUP', 'EQAUP ', 'EQAUP approval', 'EQAUP endorsement mechanism', 'EQAUP justifications', 'EQAUP manuals', 'EQAUP system', 'EQAUP target', 'Estebanh', 'Quality', 'Training confirmation link', 'activated', 'attestations granted', 'certificates', 'domain', 'goodbye', 'options', 'prominent quality managment', 'schedule the deanship', 'survey link', 'thanks', 'training courses', 'uploading documents']
243 unique stemmed words ["'m", "'s", ',', '/', '9001-2015', 'a', 'about', 'academ', 'access', 'accessign', 'accord', 'accr', 'accrdit', 'accredit', 'achiev', 'act', 'activaitng', 'aft', 'al', 'align', 'am', 'an', 'and', 'anyon', 'apply', 'approach', 'approav', 'appropry', 'approv', 'ar', 'arch', 'at', 'attend', 'attest', 'attndac', 'auth', 'award', 'awesom', 'ax', 'be', 'benchmark', 'between', 'button', 'by', 'bye', 'caclaut', 'calc', 'can', 'cas', 'cent', 'cert', 'chat', 'click', 'complet', 'condit', 

In [6]:
# create training data
training = []
output = []
# create an empty array for output
output_empty = [0] * len(classes)

# create training set, bag of words for each sentence
for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    
    # create bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is '1' for current tag and '0' for rest of other tags
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

# shuffling features and turning it into np.array
random.shuffle(training)
training = np.array(training)

# creating training lists
train_x = list(training[:,0])
train_y = list(training[:,1])

  training = np.array(training)


In [7]:
from tensorflow.python.framework import ops
ops.reset_default_graph()

# Building neural network
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 10)
net = tflearn.fully_connected(net, 10)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

# Defining model and setting up tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

# Start training
model.fit(train_x, train_y, n_epoch=1000, batch_size=16, show_metric=True)
model.save('model.tflearn')

Training Step: 17999  | total loss: [1m[32m0.00312[0m[0m | time: 0.096s
| Adam | epoch: 1000 | loss: 0.00312 - acc: 1.0000 -- iter: 136/140
Training Step: 18000  | total loss: [1m[32m0.00301[0m[0m | time: 0.100s
| Adam | epoch: 1000 | loss: 0.00301 - acc: 1.0000 -- iter: 140/140
--
INFO:tensorflow:/home/python1/Documents/chatbot/chatbotApp/NLP/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [8]:
import pickle
pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "training_data", "wb" ) )


In [9]:
# restoring all the data structures
data = pickle.load( open( "training_data", "rb" ) )
words = data['words']
classes = data['classes']
print(classes)
train_x = data['train_x']
print(train_x)
train_y = data['train_y']

[' steps for Quality', ' university', 'Academic Accreditation', 'EQAUP', 'EQAUP ', 'EQAUP approval', 'EQAUP endorsement mechanism', 'EQAUP justifications', 'EQAUP manuals', 'EQAUP system', 'EQAUP target', 'Estebanh', 'Quality', 'Training confirmation link', 'activated', 'attestations granted', 'certificates', 'domain', 'goodbye', 'options', 'prominent quality managment', 'schedule the deanship', 'survey link', 'thanks', 'training courses', 'uploading documents']


[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 

In [11]:
with open('intents.json',encoding="utf8", errors='ignore') as json_data:
    intents = json.load(json_data)

In [12]:
# load the saved model
model.load('./model.tflearn')

INFO:tensorflow:Restoring parameters from /home/python1/Documents/chatbot/chatbotApp/NLP/model.tflearn


In [13]:
def clean_up_sentence(sentence):
    # tokenizing the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stemming each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# returning bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenizing the pattern
    sentence_words = clean_up_sentence(sentence)
    # generating bag of words
    bag = [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [14]:
# create a data structure to hold user context
context = {}

ERROR_THRESHOLD = 0.8
def classify(sentence):
    # generate probabilities from the model
    results = model.predict([bow(sentence, words)])[0]
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append(    # return tuple of intent and probability
(classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

def response(sentence, userID='123', show_details=False):
    results = classify(sentence)
    # if we have a classification then find the matching intent tag
    if results:
        # loop as long as there are matches to process
        while results:
            for i in intents['intents']:
                # find a tag matching the first result
                if i['tag'] == results[0][0]:
                    # set context for this intent if necessary
                    if 'context_set' in i:
                        if show_details: print ('context:', i['context_set'])
                        context[userID] = i['context_set']

                    # check if this intent is contextual and applies to this user's conversation
                    if not 'context_filter' in i or \
                        (userID in context and 'context_filter' in i and i['context_filter'] == context[userID]):
                        if show_details: print ('tag:', i['tag'])
                        # a random response from the intent
                        return print(random.choice(i['responses']))

            results.pop(0)

In [15]:
classify('What is the process to access EQAUP system?')

[('EQAUP system', 0.9483384)]

In [16]:
response('access EQAUP system?')