In [1]:
# Importing Modules 

import nltk, random, json , pickle
#nltk.download('punkt');nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk import flatten
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Activation,Dropout
from tensorflow.keras.optimizers import SGD

In [2]:
lemmatizer=WordNetLemmatizer()
class Training:
    def __init__(self):
        #read and load the intent file
        data_file=open('intents.json').read()
        self.intents=json.loads(data_file)['intents']
        self.ignore_words=list("!@#$%^&*?")
        self.process_data()

    def process_data(self):
        #fetch patterns and tokenize them into words
        self.pattern=list(map(lambda x:x["patterns"],self.intents))
        self.words=list(map(word_tokenize,flatten(self.pattern)))
        #fetch classes i.e. tags and store in documents along with tokenized patterns 
        self.classes= flatten( [[x["tag"]]*len(y) for x,y in zip(self.intents,self.pattern)])
        self.documents=list(map(lambda x,y:(x,y),self.words,self.classes))
        #lower case and filter all the symbols from words
        self.words=list(map(str.lower,flatten(self.words)))
        self.words=list(filter(lambda x:x not in self.ignore_words,self.words))
        
        #lemmatize the words and sort the class and word lists                    
        self.words=list(map(lemmatizer.lemmatize,self.words))
        self.words=sorted(list(set(self.words)))
        self.classes=sorted(list(set(self.classes)))

    def train_data(self):
        #initialize and set analyzer=word as we want to vectorize words not characters
        cv=CountVectorizer(tokenizer=lambda txt: txt.split(),analyzer="word",stop_words=None)
        #create the training sets for model
        training=[]
        for doc in self.documents:
            #lower case and lemmatize the pattern words
            pattern_words=list(map(str.lower,doc[0]))
            pattern_words=' '.join(list(map(lemmatizer.lemmatize,pattern_words)))

            #train or fit the vectorizer with all words
            #and transform into one-hot encoded vector
            vectorize=cv.fit([' '.join(self.words)])
            word_vector=vectorize.transform([pattern_words]).toarray().tolist()[0]

            #create output for the respective input
            #output size will be equal to total numbers of classes
            output_row=[0]*len(self.classes)

            #if the pattern is from current class put 1 in list else 0
            output_row[self.classes.index(doc[1])]=1
            cvop=cv.fit([' '.join(self.classes)])
            out_p=cvop.transform([doc[1]]).toarray().tolist()[0]

            #store vectorized word list long with its class
            training.append([word_vector,output_row])

        #shuffle training sets to avoid model to train on same data again
        random.shuffle(training)
        training=np.array(training,dtype=object)
        train_x=list(training[:,0])#patterns
        train_y=list(training[:,1])#classes
        print(train_y)
        return train_x,train_y 

    def build(self):
        #load the data from train_data function
        train_x,train_y = self.train_data()
        
        ##Create a Sequential model with 3 layers. 
        model=Sequential()
        #input layer with latent dimension of 128 neurons and ReLU activation function 
        model.add(Dense(128,input_shape=(len(train_x[0]),),activation='relu'))
        model.add(Dropout(0.5)) #Dropout to avoid overfitting
        #second layer with the latent dimension of 64 neurons
        model.add(Dense(64,activation='relu')) 
        model.add(Dropout(0.5))
        #fully connected output layer with softmax activation function
        model.add(Dense(len(train_y[0]),activation='softmax')) 
        '''Compile model with Stochastic Gradient Descent with learning rate  and
           nesterov accelerated gradient descent'''
        sgd=SGD(lr=1e-2,decay=1e-6,momentum=0.9,nesterov=True)
        model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
        #fit the model with training input and output sets
        hist=model.fit(np.array(train_x),np.array(train_y),epochs=200,batch_size=10,verbose=1)
        #save model and words,classes which can be used for prediction.
        model.save('chatbot_model.h5',hist)
        pickle.dump({'words':self.words,'classes':self.classes,'train_x':train_x,'train_y':train_y},
                    open("training_data","wb"))

#train the model
Training().build()



[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200

Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 

Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
