1. Importing json file which contains intents

In [43]:
import json

data_file = open("intents3.json", encoding="utf-8").read()
data = json.loads(data_file)
data


{'intents': [{'tag': 'greeting',
   'patterns': ['Hi',
    'Hello',
    'Hey',
    'Good morning',
    'Good afternoon',
    'Hi there',
    'Hey there',
    'Yo'],
   'responses': ["Hey! How's it going?",
    "Hello! What's up?",
    'Hi there! How can I help?',
    "Hi! How's your day?",
    "Hello! What's new?"],
   'context': ''},
  {'tag': 'noanswer',
   'patterns': [],
   'responses': ["Sorry, can't understand you",
    'Please give me more info',
    'Not sure I understand'],
   'context': ''},
  {'tag': 'job',
   'patterns': ['What is your job', 'What is your work'],
   'responses': ['My job is to make you feel like everything is okay.',
    'I work to serve you as well as possible'],
   'context': ''},
  {'tag': 'age',
   'patterns': ['What is your age', 'How old are you', 'When were you born'],
   'responses': ['I was born in 2021'],
   'context': ''},
  {'tag': 'feeling',
   'patterns': ['How are you today', 'How are you'],
   'responses': ['I am feeling good, you?',
    'Ve

In [44]:
len(data["intents"])

32

2. Creating pattern lemmatizer which tokanize and lemmatize words in a sentence

In [45]:
import nltk
from nltk.stem import WordNetLemmatizer
import string
import numpy as np
#nltk.download("punkt")
#nltk.download("wordnet")

def pattern_lemmatizer(text):
    pat_lemmatizer = WordNetLemmatizer()
    return [pat_lemmatizer.lemmatize(word) for word in nltk.word_tokenize(text.lower())]

    

3. Create a algorithm to store each word in our json file, tags for each pattern(sentence), input patterns and target outputs for train the model

In [46]:
def initialize_lists(data):
    words = []
    tag_classes = []
    predictor_patterns  = []
    target_tags = []
    
    for intent in data["intents"]:
        for pattern in intent["patterns"]:
            tokens = nltk.word_tokenize(pattern)
            words.extend(tokens)
            predictor_patterns.append(pattern)
            target_tags.append(intent["tag"])
        if intent["tag"] not in tag_classes:
            tag_classes.append(intent["tag"])
    tag_classes = sorted(tag_classes)
    return words, tag_classes, predictor_patterns, target_tags

4. Creating algorithms to make a Vocab which lemmatize each words in word list

In [47]:

def vocab_maker(words):
    lemmatizer = WordNetLemmatizer()
    
    vocab = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
    vocab = sorted(set(vocab))
    return vocab

5. Bag of words function encode each sentence to a binary code array according to a rule with respect to our vocab

In [48]:
def bag_of_words(pattern, vocab):
    bow=[]
    for word in vocab:
        bow.append(1) if word in pattern else bow.append(0)
    return np.array(bow)

6. Creating a Training Set

In [49]:

words, tag_classes, predictor_patterns, target_tags = initialize_lists(data)
vocab = vocab_maker(words)
lemmatizer = WordNetLemmatizer()

training = []
out = [0]*len(tag_classes)
for idx, pattern in enumerate(predictor_patterns):
    pattern_lemma = lemmatizer.lemmatize(pattern.lower()) #for word in nltk.word_tokenize(text.lower())
    #pattern_lemma = pattern_lemmatizer(pattern)
    bow = bag_of_words(pattern_lemma, vocab)
    output = list(out)
    output[tag_classes.index(target_tags[idx])] = 1
    training.append([bow, output])





In [50]:
import random

random_training = training
random.shuffle(random_training)
training_array = np.array(random_training, dtype=object)
train_X = np.array(list(training_array[:,0]))
train_Y = np.array(list(training_array[:,1]))

7. Training the model. The model we are using is a neural network

In [51]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout



model = Sequential()

model.add(Dense(128, input_shape=(train_X.shape[1],), activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(train_Y.shape[1], activation="softmax"))
                   
model.compile(loss = "categorical_crossentropy",
              optimizer = tf.keras.optimizers.Adam(learning_rate =0.01),
              metrics = ["accuracy"])

In [52]:
model.summary()

In [53]:
model.fit(x =train_X, y= train_Y, epochs = 150, verbose=1)

Epoch 1/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.0504 - loss: 3.5171
Epoch 2/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0711 - loss: 3.3526 
Epoch 3/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1724 - loss: 3.1557 
Epoch 4/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1968 - loss: 3.0437 
Epoch 5/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2387 - loss: 2.8295 
Epoch 6/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3027 - loss: 2.6157 
Epoch 7/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3385 - loss: 2.3194 
Epoch 8/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3851 - loss: 2.1386 
Epoch 9/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x1e011bd6c50>

8. Creating a test set to evaluate the model

In [54]:
data_file2 = open("intentstest.json", encoding="utf-8").read()
data_test = json.loads(data_file)
data_test

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi',
    'Hello',
    'Hey',
    'Good morning',
    'Good afternoon',
    'Hi there',
    'Hey there',
    'Yo'],
   'responses': ["Hey! How's it going?",
    "Hello! What's up?",
    'Hi there! How can I help?',
    "Hi! How's your day?",
    "Hello! What's new?"],
   'context': ''},
  {'tag': 'noanswer',
   'patterns': [],
   'responses': ["Sorry, can't understand you",
    'Please give me more info',
    'Not sure I understand'],
   'context': ''},
  {'tag': 'job',
   'patterns': ['What is your job', 'What is your work'],
   'responses': ['My job is to make you feel like everything is okay.',
    'I work to serve you as well as possible'],
   'context': ''},
  {'tag': 'age',
   'patterns': ['What is your age', 'How old are you', 'When were you born'],
   'responses': ['I was born in 2021'],
   'context': ''},
  {'tag': 'feeling',
   'patterns': ['How are you today', 'How are you'],
   'responses': ['I am feeling good, you?',
    'Ve

In [55]:
len(data_test["intents"])

32

In [56]:


words_test, tag_classes_test, predictor_patterns_test, target_tags_test = initialize_lists(data_test)

test = []
out2 = [0]*len(tag_classes)
for idx, pattern in enumerate(predictor_patterns_test):
    pattern_lemma = lemmatizer.lemmatize(pattern.lower())
    #pattern_lemma = pattern_lemmatizer(pattern)
    bow2 = bag_of_words(pattern_lemma, vocab)
    output2 = list(out2)
    output2[tag_classes.index(target_tags_test[idx])] = 1
    test.append([bow2, output2])

random_test = test
random.shuffle(random_test)
test_array = np.array(random_test, dtype=object)
test_X = np.array(list(test_array[:,0]))
test_Y = np.array(list(test_array[:,1]))

In [57]:
loss, accuracy = model.evaluate(test_X, test_Y)
print(f'Test accuracy: {accuracy}')

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9358 - loss: 0.0916  
Test accuracy: 0.9411764740943909


8. Following functions are made to chat with humans. Predict class predicts the tag class for a given text. Get responce function choose a random response according to predictet tag using our json file

In [58]:

def predict_tag_class(text, vocab, tag_classes):
    bow = bag_of_words(text, vocab)
    bow = bow.reshape(1, -1)
    
    predictions = model.predict(bow, verbose=0)[0]
    
    most_probabble =  [[idx,pred] for idx,pred in enumerate(predictions) if pred > 0.5]
    most_probabble.sort(key=lambda X: X[1], reverse=True)
    
    pred_tag_classes =[]
    for tags in most_probabble:
        pred_tag_classes.append(tag_classes[tags[0]])
    return pred_tag_classes

def get_responces(predicted_classes, intents):
    if len(predicted_classes) == 0:
        result = "Sorry, I cant't understand what you are telling"
    else:
        tag = predicted_classes[0]
        list_of_intents = intents["intents"]
        for intent in list_of_intents:
            if intent["tag"] == tag:
                result = random.choice(intent["responses"])
                break
    return result

In [59]:
print("Enter 0 to stop the chat")
print("\n")
while True:
    text = input("You : ")
    if text == "0":
        break
    pattern = pattern_lemmatizer(text)
    result = get_responces(predict_tag_class(pattern,vocab, tag_classes), data)
    print("ChatBot :",result)

Enter 0 to stop the chat




You :  hi


ChatBot : Hello! What's up?


You :  how are you


ChatBot : I'm doing great! How about you?


You :  im fine


ChatBot : That is perfect!


You :  who are you


ChatBot : I'm ChatBot, your friendly virtual assistant!


You :  bye


ChatBot : See you later, alligator!


You :  0
