# Model Training


### Imports and Downloads

In [None]:
#Imports
import random
import json
import pickle
import numpy as np
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout,Flatten
from tensorflow.keras.models import load_model

In [None]:
#NLTK Downloads
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

### Loading Data from JSON and Lemmatizing

In [None]:
#Getting Words and Tags from Intents
lemmatizer = WordNetLemmatizer()
intents = json.loads(open('intent3.json').read())['intents'] #Intents File

words = []
classes = []
entries = []
# ignore_letters = ['?', '!',',','.']
ignore = ['a', 'an', 'the', 'is', 'am', 'are', 'was', 'were', 'be', 'being', 'been', 'and', 'or', 'of', 'at', 'by', 'in', 'on', 'to', 'with', 'that', 'this', 'for', 'from', 'it', 'you', 'he', 'she', 'they', 'we', 'me', 'him', 'her', 'them', 'my', 'your', 'his', 'her', 'our', 'their', 'what', 'where', 'when', 'why', 'how', 'which', 'who', 'whom','next', '!', '?', '.', ',', ';', ':', '-', '_', '(', ')', '[', ']', '{', '}', "'", '"']

#Making three lists of words, (words,tag) pair and tags
for intent in intents:
    classes.append(intent['tag'])
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        entries.append((word_list,intent['tag']))
#Storing List of Works in Base Form using lemmatizer
words = [lemmatizer.lemmatize(word) for word in words if word.lower() not in ignore]
words = sorted(set(words))
classes = sorted(set(classes))
#Pickle Dump for Runtime loading of these Information
pkl={"words":words,"classes":classes}
pickle.dump(pkl, open('data.pkl', 'wb'))

### Making Dataset

In [None]:
#Making the dataset
training = pd.DataFrame(columns=words+['Intent'])
length=0
for entry in entries:
    bag =[]
    word_patterns = entry[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)
  
    #Appending Rows with words and their tags 
    training.loc[length]=bag+[entry[1]]
    length+=1
training.to_csv("Dataset.csv",index=False)

In [None]:
training.head()

Unnamed: 0,'ll,'m,'s,ARIIA,B.Tech,BTech,Can,Could,DSAI,Do,...,wellness,whole,will,winter,without,work,workshop,would,year,Intent
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings


### Dataset Preprocessing

In [None]:
df=pd.read_csv("Dataset.csv")
df.head()

Unnamed: 0,'ll,'m,'s,ARIIA,B.Tech,BTech,Can,Could,DSAI,Do,...,wellness,whole,will,winter,without,work,workshop,would,year,Intent
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,greetings


In [None]:
#Splitting in X and y
X = df.iloc[:, :len(words)]
y = df.iloc[:, -1]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y)

In [None]:
Y_train=pd.get_dummies(y_train)
Y_test=pd.get_dummies(y_test)

### Model Training and Comparision

In [None]:
from sklearn.svm import SVC  
clf = SVC(kernel='linear') 
clf.fit(X_train, y_train) 
clf.score(X_test,y_test)

0.6633663366336634

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.6831683168316832

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
clf = ExtraTreesClassifier()
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.693069306930693

### Lazy Predict Comparision

In [None]:
!pip install lazypredict &> /dev/null
import lazypredict
from lazypredict.Supervised import LazyClassifier
clf = LazyClassifier(verbose=0)
models,predictions = clf.fit(X_train, X_test, y_train, y_test)
models

100%|██████████| 29/29 [02:52<00:00,  5.93s/it]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ExtraTreesClassifier,0.71,0.71,,0.7,0.47
RandomForestClassifier,0.67,0.67,,0.67,0.49
DecisionTreeClassifier,0.64,0.64,,0.63,0.08
LinearSVC,0.61,0.61,,0.6,33.1
BaggingClassifier,0.61,0.61,,0.62,0.48
RidgeClassifierCV,0.61,0.61,,0.61,0.22
RidgeClassifier,0.59,0.59,,0.6,0.09
PassiveAggressiveClassifier,0.58,0.58,,0.58,0.88
LogisticRegression,0.58,0.58,,0.61,2.08
SGDClassifier,0.56,0.56,,0.56,0.47


#### ANN Model

In [None]:
#Model Definition
model = Sequential()
model.add(Dense(X_train.shape[1]+1, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(300, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(Y_train.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 640)               409600    
                                                                 
 dropout (Dropout)           (None, 640)               0         
                                                                 
 dense_1 (Dense)             (None, 300)               192300    
                                                                 
 dropout_1 (Dropout)         (None, 300)               0         
                                                                 
 dense_2 (Dense)             (None, 51)                15351     
                                                                 
Total params: 617,251
Trainable params: 617,251
Non-trainable params: 0
_________________________________________________________________


In [None]:
hist = model.fit(X_train, Y_train, epochs=51, verbose=1,validation_data=(X_test,Y_test))

Epoch 1/51
Epoch 2/51
Epoch 3/51
Epoch 4/51
Epoch 5/51
Epoch 6/51
Epoch 7/51
Epoch 8/51
Epoch 9/51
Epoch 10/51
Epoch 11/51
Epoch 12/51
Epoch 13/51
Epoch 14/51
Epoch 15/51
Epoch 16/51
Epoch 17/51
Epoch 18/51
Epoch 19/51
Epoch 20/51
Epoch 21/51
Epoch 22/51
Epoch 23/51
Epoch 24/51
Epoch 25/51
Epoch 26/51
Epoch 27/51
Epoch 28/51
Epoch 29/51
Epoch 30/51
Epoch 31/51
Epoch 32/51
Epoch 33/51
Epoch 34/51
Epoch 35/51
Epoch 36/51
Epoch 37/51
Epoch 38/51
Epoch 39/51
Epoch 40/51
Epoch 41/51
Epoch 42/51
Epoch 43/51
Epoch 44/51
Epoch 45/51
Epoch 46/51
Epoch 47/51
Epoch 48/51
Epoch 49/51
Epoch 50/51
Epoch 51/51


In [None]:
#Model Save
model.save('Model.h5')

### Application

In [None]:
import difflib

In [None]:
def predict_intent(message):
    sentence_words = nltk.word_tokenize(message)
    sentence_words = [lemmatizer.lemmatize(word)  for word in sentence_words if word not in ignore]
    l=[difflib.get_close_matches(word, words)[0] for word in sentence_words]
    if len(l)!=0:
      sentence_words = l
    print(sentence_words)
    bag = [0] * len(words)
    for w in sentence_words:
        for i, word in enumerate(words):
            if word == w.lower():
              bag[i] = 1
    res=model.predict(np.array([bag]))[0]
    ERROR_THRESHOLD = 0.5
    results = [[i,r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    results.sort(key=lambda  x:x[1], reverse=True)
    res_list = []
    for r in results:
        res_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
    if len(res_list)==0:
      res_list.append({'intent': "sorry", 'probability': 1})
    return res_list[0]

In [None]:
while True:
    message = input("Bot | How can I help You? \nYou |")
    intent=predict_intent(message)
    print(intent)
    if intent['intent']=='sorry' or intent['intent']=='goodbye':
      break

Bot | How can I help You? 
You |number of students
['number', 'student']
{'intent': 'strength', 'probability': '0.86566025'}
Bot | How can I help You? 
You |strength of students
['strength', 'student']
{'intent': 'strength', 'probability': '0.9791678'}
Bot | How can I help You? 
You |bye
['Goodbye']
{'intent': 'sorry', 'probability': 1}
