In [1]:
import random
import json
import pickle
import numpy as np
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [49]:
lemmatizer = WordNetLemmatizer()
greetings_intents = json.loads(open("./greetings.json").read())
iiec_info_intents = json.loads(open("./iiec_info.json").read())
# iiec_executives_intents = json.loads(open("./iiec_executives.json").read())

intents = greetings_intents.copy()
intents["intents"].extend(iiec_info_intents["intents"])
# intents["intents"].extend(iiec_executives_intents["intents"])
intents

{'intents': [{'tag': 'greeting',
   'patterns': ['hey',
    'hello',
    'hi',
    'good morning',
    'good day',
    'how are you?',
    "what's up?",
    'how is it going?'],
   'responses': ["I'm IIEC Greeting bot. How can I help you?",
    'Hello! How can I assist you today?',
    'Hi there! What can I do for you?',
    'Good morning! What brings you here?',
    'Hey! Welcome to our chat. How may I assist you?',
    "Hey! How's your day going?",
    'Hi! Need any help or information?']},
  {'tag': 'purpose',
   'patterns': ['who are you?',
    "why you're made?",
    'who made you?',
    'why do you exist?'],
   'responses': ["Hey, I'm a greeting chatbot made by IIEC.",
    'I can provide you any information about IIEC. So go ahead and ask any questions you want!',
    "I'm here to assist you with any questions you have about IIEC.",
    'I exist to help you learn more about IIEC.',
    "I'm a chatbot designed to assist with queries related to IIEC.",
    "I'm IIEC's virtual assis

In [50]:
words = []
classes = []
documents = []
ignore_chars = ["?", "!", ".", ",", "[", "]", "(", ")", "{", "}"]

for intent in intents["intents"]:
  for pattern in intent["patterns"]:
    word_list = nltk.word_tokenize(pattern)
    words.extend(word_list)
    documents.append((word_list, intent["tag"]))
  if intent["tag"] not in classes:
    classes.append(intent["tag"])
		
		
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_chars]
words = sorted(set(words))
classes = sorted(set(classes))
pickle.dump(words, open("./words.pkl", "wb"))
pickle.dump(classes, open("./classes.pkl", "wb"))
len(classes)

30

In [51]:
training = []
output_empty = [0] * len(classes)

for document in documents:
  bag = []
  word_patterns = document[0]
  word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
  for word in words:
    bag.append(1) if word in word_patterns else bag.append(0)

  output_row = list(output_empty)
  output_row[classes.index(document[1])] = 1
  training.append([bag, output_row])

random.shuffle(training)

train_x = np.array([i[0] for i in training])
train_y = np.array([i[1] for i in training])
train_x.shape, train_y.shape

((151, 296), (151, 30))

In [52]:
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation="softmax"))

sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.90, patience=5, min_lr=0.0001)

model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=['accuracy'])

In [59]:
# hist = model.fit(np.array(train_x), np.array(train_y), epochs=100, batch_size=25, verbose=1)
hist = model.fit(np.array(train_x), np.array(train_y), epochs=120, batch_size=5, verbose=1, callbacks=[reduce_lr])

model.save("./model_no_executive.keras", hist)
print("training done!")

Epoch 1/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9781 - loss: 0.1060 - learning_rate: 0.0031
Epoch 2/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9681 - loss: 0.1124 - learning_rate: 0.0031
Epoch 3/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 922us/step - accuracy: 0.9822 - loss: 0.0706 - learning_rate: 0.0031
Epoch 4/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 997us/step - accuracy: 0.9704 - loss: 0.0756 - learning_rate: 0.0031
Epoch 5/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9422 - loss: 0.1984 - learning_rate: 0.0031
Epoch 6/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9719 - loss: 0.0799 - learning_rate: 0.0031
Epoch 7/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9833 - loss: 0.1030 - learning_rate: 0.0031
Ep