In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [2]:
import random
from tensorflow.keras.optimizers import SGD
from keras.layers import Dense, Dropout
from keras.models import load_model
from keras.models import Sequential
import numpy as np
import pandas as pd
import pickle
import json
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
nltk.download('omw-1.4')
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [3]:
# init file
words = []
classes = []
documents = []
ignore_words = ["?", "!"]
data_file = open("/content/gdrive/MyDrive/Implementation/revenue/chatbot/Chatbot.json").read()
intents = json.loads(data_file)

In [4]:
# words
for intent in intents["intents"]:
    for pattern in intent["patterns"]:

        # take each word and tokenize it
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # adding documents
        documents.append((w, intent["tag"]))

        # adding classes to our class list
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

In [5]:
# lemmatizer
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

classes = sorted(list(set(classes)))

print(len(documents), "documents")

print(len(classes), "classes", classes)

print(len(words), "unique lemmatized words", words)

pickle.dump(words, open("/content/gdrive/MyDrive/Implementation/revenue/chatbot/words.pkl", "wb"))
pickle.dump(classes, open("/content/gdrive/MyDrive/Implementation/revenue/chatbot/classes.pkl", "wb"))

68 documents
37 classes ['AI', 'Show revenue', 'abbr', 'artificial', 'business', 'chatbot', 'chatterbox', 'clone', 'computer', 'fav', 'fight', 'goodbye', 'greetings', 'idea', 'imortal', 'lang', 'lie', 'machine', 'move', 'name', 'name1', 'need', 'noanswer', 'os', 'programming', 'revenue five month', 'revenue month', 'revenue quater', 'revenue two month', 'robotics', 'robots', 'sapient', 'sense', 'sentiment', 'sound', 'stupid', 'thanks']
88 unique lemmatized words ["'m", "'s", ',', 'a', 'ai', 'all', 'allowed', 'am', 'an', 'are', 'artificial', 'awesome', 'be', 'being', 'business', 'bye', 'can', 'chat', 'chatterbox', 'clone', 'computer', 'data', 'entity', 'favorite', 'favour', 'fight', 'for', 'good', 'great', 'haroo', 'hello', 'help', 'helpful', 'helping', 'hey', 'hi', 'holla', 'i', 'idea', 'immortal', 'in', 'is', 'it', 'language', 'later', 'lie', 'like', 'linguistic', 'making', 'me', 'move', 'my', 'name', 'need', 'not', 'okay', 'operating', 'programming', 'revenue', 'robot', 'robotics', '

In [6]:
# training initializer
# initializing training data
training = []
output_empty = [0] * len(classes)
for doc in documents:
    # initializing bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

In [7]:
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)
# create train and test lists. X - patterns, Y - intents
train_x = list(training[:, 0])
train_y = list(training[:, 1])
print("Training data created")

Training data created


  training = np.array(training)


In [8]:
# actual training
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation="softmax"))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               11392     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 37)                2405      
                                                                 
Total params: 22,053
Trainable params: 22,053
Non-trainable params: 0
_________________________________________________________________


In [9]:
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])

In [10]:
import keras
checkpointer = keras.callbacks.ModelCheckpoint('/content/gdrive/MyDrive/Implementation/revenue/chatbot/chat_model.h5',
                                               verbose=1, save_best_only=True, monitor='accuracy')
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1, callbacks=[checkpointer])
print("model created")

Epoch 1/200
Epoch 1: accuracy improved from -inf to 0.04412, saving model to /content/gdrive/MyDrive/Implementation/revenue/chatbot/chat_model.h5
Epoch 2/200
Epoch 2: accuracy improved from 0.04412 to 0.13235, saving model to /content/gdrive/MyDrive/Implementation/revenue/chatbot/chat_model.h5
Epoch 3/200
Epoch 3: accuracy did not improve from 0.13235
Epoch 4/200
 1/14 [=>............................] - ETA: 0s - loss: 3.3482 - accuracy: 0.2000
Epoch 4: accuracy improved from 0.13235 to 0.22059, saving model to /content/gdrive/MyDrive/Implementation/revenue/chatbot/chat_model.h5
Epoch 5/200
Epoch 5: accuracy did not improve from 0.22059
Epoch 6/200
Epoch 6: accuracy did not improve from 0.22059
Epoch 7/200
 1/14 [=>............................] - ETA: 0s - loss: 3.1642 - accuracy: 0.2000
Epoch 7: accuracy did not improve from 0.22059
Epoch 8/200
 1/14 [=>............................] - ETA: 0s - loss: 2.9784 - accuracy: 0.2000
Epoch 8: accuracy did not improve from 0.22059
Epoch 9/200


In [12]:
from keras.models import load_model
model = load_model('/content/gdrive/MyDrive/Implementation/revenue/chatbot/chat_model.h5')
pred = model.predict(train_x)
from sklearn.metrics import classification_report
print(classification_report(np.array(train_y).argmax(axis=1), pred.argmax(axis=1)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         1
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         1
           8       1.00      1.00      1.00         2
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         1
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         9
          13       1.00      1.00      1.00         1
          14       1.00      1.00      1.00         2
          15       1.00      1.00      1.00         1
          16       1.00      1.00      1.00         1
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
