Importing the Modules Needed

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import json
import pandas as pd
import numpy as np
import pickle

Loading the Dataset

In [2]:
with open("./dataset/dataset.json", "rb") as file :
    data = json.load(file)

In [3]:
tags = []
inputs = []
responses={}
classes = []

for intent in data['intents']:
  if intent["tag"] not in classes :
    classes.append(intent["tag"])
  responses[intent['tag']]=intent['responses']
  for lines in intent['patterns']:
    inputs.append(lines)
    tags.append(intent['tag'])

Saving Each Class with their Corresponding Response

In [4]:
pickle.dump(classes, open('./model/classes.pkl', 'wb'))
pickle.dump(responses, open('./model/responses.pkl', 'wb'))

In [5]:
import string

for i in range(0, len(inputs)) :
    inputs[i] = inputs[i].translate(str.maketrans('', '', string.punctuation)).lower()

In [6]:
df = pd.DataFrame({"inputs":inputs,
                     "tags":tags})
print(df.head())

            inputs      tags
0               hi  greeting
1              hey  greeting
2      how are you  greeting
3  is anyone there  greeting
4            hello  greeting


In [7]:
df["tags"].value_counts()

creator     16
name        13
greeting     6
thanks       4
goodbye      3
skill        3
funny        3
Name: tags, dtype: int64

Replacing Each Class to a Numeric Value 

In [8]:
df.tags = df.tags.replace({"greeting": 0, "goodbye": 1, "thanks": 2, "creator": 3, "name": 4, "skill": 5, "funny": 6, "disease": 7})

In [9]:
df.head()

Unnamed: 0,inputs,tags
0,hi,0
1,hey,0
2,how are you,0
3,is anyone there,0
4,hello,0


In [10]:
df["tags"].value_counts()

3    16
4    13
0     6
2     4
1     3
5     3
6     3
Name: tags, dtype: int64

In [11]:
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [12]:
X

Unnamed: 0,inputs
0,hi
1,hey
2,how are you
3,is anyone there
4,hello
5,good day
6,bye
7,see you later
8,goodbye
9,thanks


In [13]:
y

0     0
1     0
2     0
3     0
4     0
5     0
6     1
7     1
8     1
9     2
10    2
11    2
12    2
13    3
14    3
15    3
16    3
17    3
18    3
19    3
20    3
21    3
22    3
23    3
24    3
25    3
26    3
27    3
28    3
29    4
30    4
31    4
32    4
33    4
34    4
35    4
36    4
37    4
38    4
39    4
40    4
41    4
42    5
43    5
44    5
45    6
46    6
47    6
Name: tags, dtype: int64

Importing The BERT Encoder and the BERT Preprocess

In [None]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

Get Sentence Embeding Function

In [None]:
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

Structure of the NLP Model with TensorFlow

In [None]:
# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(len(classes), activation='softmax', name="output")(l)

# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])

In [None]:
model.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss = tf.keras.losses.sparse_categorical_crossentropy

In [None]:
model.compile(optimizer=optimizer, loss=loss, metrics=["acc"])

Training the Model

In [None]:
model.fit(X, y, epochs=150)

Testing the Model

In [None]:
sentence = "Hello!"

sentence = sentence.translate(str.maketrans('', '', string.punctuation))

sent_seq = np.expand_dims(sentence, axis = 0)

pred = model.predict(sent_seq)

pred_class = np.argmax(pred)

print(classes[pred_class])

Saving the Model

In [None]:
model.save("./model/chatbot_model.keras")