In [12]:
import numpy as np
import tflearn
import tensorflow as tf
import random
import json
import pickle
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy as np

In [14]:
nltk.download('punkt') #using punkt tokenizer
nltk.download('wordnet') #using the wordnet dictonary
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Swarna\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Swarna\AppData\Roaming\nltk_data...
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Swarna\AppData\Roaming\nltk_data...


True

In [6]:
with open('intents.json') as file:
    data = json.load(file)
print(data)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Hey', 'Is anyone there?', 'Hello', 'Hay'], 'responses': ['Hello', 'Hi', 'Hi there'], 'context': ['']}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye'], 'responses': ['See you later', 'Have a nice day', 'Bye! Come back again'], 'context': ['']}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful", 'Thanks for the help'], 'responses': ['Happy to help!', 'Any time!', 'My pleasure', "You're most welcome!"], 'context': ['']}, {'tag': 'about', 'patterns': ['Who are you?', 'What are you?', 'Who you are?'], 'responses': ["I'm a bot assistant", "I'm Joana, a master's university recommendation bot"], 'context': ['']}, {'tag': 'recommendation', 'patterns': ["Recommend me a master's university", "I want to do master's abroad", "Recommend me some master's universities"], 'responses': ['Please provide us your details in order to assist you', "Please mention your field of study, and we will help you find a mast

#for another class

In [10]:
stemmer = LancasterStemmer() #stemmer to get stem of a word. ex. 'say' would be stem word of 'saying'.


def define_network(X, y):
	tf.compat.v1.reset_default_graph() #Clears the default graph stack and resets the global default graph
	# neural network's layers
	network = tflearn.input_data(shape= [None, len(X[0])]) #input layer
	network = tflearn.fully_connected(network, 8) #1st hidden layer
	network = tflearn.fully_connected(network, 8) #2nd hidden layer
	network = tflearn.fully_connected(network, len(y[0]), activation= 'softmax') #output layer
	network = tflearn.regression(network)
	model = tflearn.DNN(network, tensorboard_dir='tflearn_logs') #tensorboard_dir is path to store logs
	return model


# gives stemmed, tokenized words list from sentence pattern without words in ignore_words list
def clean_pattern(pattern, ignore_words):
    stemmed_pattern = []
    wrds = nltk.word_tokenize(pattern)
    for w in wrds:
        if w not in ignore_words:
            stemmed_pattern.append(stemmer.stem(w.lower()))
    return stemmed_pattern


# generates a numpy array of 0 & 1 from string sentence of user to fed to model
def bag_of_words(sentence, stemmed_words, ignore_words):
	bag = []
	stemmed_pattern = clean_pattern(sentence, ignore_words)
	for w in stemmed_words:
		if w in stemmed_pattern:
			bag.append(1)
		else:
			bag.append(0)
	return np.array(bag)

In [15]:
# Some cleaning of data in intents.json
stemmed_words = []
tags = []
ignore_words = ['!', '?', '.']
corpus = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        stemmed_pattern = clean_pattern(pattern, ignore_words)
        stemmed_words.extend(stemmed_pattern)
        corpus.append((stemmed_pattern, intent['tag']))
    if intent['tag'] not in tags:
        tags.append(intent['tag'])

# remove duplicates and sort
stemmed_words = sorted(list(set(stemmed_words)))
tags = sorted(list(set(tags)))

print(stemmed_words)
print(tags)
print(corpus)

["'s", '100', '105', '110', '2.7', '3', '3.10', '3.14', '3.28', '3.30', '3.40', '3.5', '3.75', '3.8', '3.9', '310', '320', '325', '330', '6.5', '7.0', '7.5', '8', '90', '95', 'a', 'abroad', 'admin', 'am', 'and', 'anyon', 'ar', 'band', 'busy', 'bye', 'cgpa', 'comput', 'cse', 'did', 'do', 'econom', 'ee', 'field', 'for', 'goodby', 'gre', 'hav', 'hay', 'hello', 'help', 'hey', 'hi', 'i', 'ibt', 'ielt', 'in', 'interest', 'is', 'lat', 'loc', 'maj', 'mast', 'me', 'my', "n't", 'no', 'of', 'or', 'percentil', 'pref', 'prefer', 'recommend', 'result', 'sci', 'scor', 'see', 'som', 'study', 'tak', 'thank', 'that', 'the', 'ther', 'to', 'toefl', 'univers', 'want', 'what', 'who', 'you', '{', '}']
['about', 'cgpa', 'field', 'goodbye', 'gre_score', 'greeting', 'ielts_score', 'no_gre', 'no_gre_toefl', 'no_toefl', 'recommendation', 'recommendation_flow', 'thanks', 'toefl_score']
[(['hi'], 'greeting'), (['hey'], 'greeting'), (['is', 'anyon', 'ther'], 'greeting'), (['hello'], 'greeting'), (['hay'], 'greeting'

In [16]:
# Creating numeric features and labels out of cleaned data
X = []
y = []
for item in corpus:
    bag = [] #array of 1 and 0. 1 if stemmed word is present in stemmed pattern
    stemmed_pattern = item[0]
    for w in stemmed_words:
        if w in stemmed_pattern:
            bag.append(1)
        else:
            bag.append(0)

    tags_row = [] #array of 1 and 0. 1 for current tag and for everything else 0.
    current_tag = item[1]
    for tag in tags:
        if tag == current_tag:
            tags_row.append(1)
        else:
            tags_row.append(0)

    #for each item in corpus, X will be array indicating stemmed words and y array indicating tags
    X.append(bag)
    y.append(tags_row) 

X = np.array(X)
y = np.array(y)
print(X)
print(y)

# saving variables in pickle to be used by main.py
with open('saved_variables.pickle', 'wb') as file:
    pickle.dump((stemmed_words, tags, ignore_words, X, y), file)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 

In [17]:
model = define_network(X, y)
model.fit(X, y, n_epoch=1120, batch_size=8, show_metric=True) 
model.save("chatbot_model.tflearn")

Training Step: 8959  | total loss: [1m[32m0.64755[0m[0m | time: 0.023s
| Adam | epoch: 1120 | loss: 0.64755 - acc: 0.9443 -- iter: 56/61
Training Step: 8960  | total loss: [1m[32m0.58728[0m[0m | time: 0.026s
| Adam | epoch: 1120 | loss: 0.58728 - acc: 0.9498 -- iter: 61/61
--


INFO:tensorflow:f:\NSU\cse498R\498.5\chatbot_model.tflearn is not in all_model_checkpoint_paths. Manually adding it.
