In [27]:
import json 
import numpy as np 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

In [28]:
with open('intents.json') as file:
    data = json.load(file)

In [29]:
data

{'intents': [{'tag': 'greeting',
   'patterns': ['hello', 'hi', 'hey'],
   'responses': ['Hello! How can I assist you today?',
    'Hi there! How can I help you?']},
  {'tag': 'farewell',
   'patterns': ['goodbye', 'bye', 'see you later'],
   'responses': ['Goodbye! Have a great day!', 'See you later! Take care.']},
  {'tag': 'job',
   'patterns': ['what can you do?',
    'what all can you perform?',
    'what are the options?',
    'what can the options you have?'],
   'responses': ['I can do addition,subtraction,Multiplication and divison']},
  {'tag': 'add',
   'patterns': ['Add', 'Addition', 'Add numbers'],
   'responses': ['Sure, what are the numbers you want to add?']},
  {'tag': 'subtract',
   'patterns': ['Subtract', 'Subtraction', 'Subtract numbers'],
   'responses': ['Sure, what are the numbers you want to subtract?']},
  {'tag': 'multiply',
   'patterns': ['Multiply', 'Multiplication', 'Multiply numbers'],
   'responses': ['Sure, what are the numbers you want to multiply?']}

In [30]:
training_sentences = []
training_labels = []
labels = []
responses = []


for intent in data['intents']:
    for pattern in intent['patterns']:
        training_sentences.append(pattern)
        training_labels.append(intent['tag'])
    responses.append(intent['responses'])
    
    if intent['tag'] not in labels:
        labels.append(intent['tag'])
        
num_classes = len(labels)

In [31]:
training_labels

['greeting',
 'greeting',
 'greeting',
 'farewell',
 'farewell',
 'farewell',
 'job',
 'job',
 'job',
 'job',
 'add',
 'add',
 'add',
 'subtract',
 'subtract',
 'subtract',
 'multiply',
 'multiply',
 'multiply',
 'divison',
 'divison']

In [32]:
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)


In [33]:
vocab_size = 1000
embedding_dim = 16
max_len = 20
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token) # adding out of vocabulary token
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)

In [34]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [35]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 20, 16)            16000     
                                                                 
 global_average_pooling1d_1   (None, 16)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_3 (Dense)             (None, 16)                272       
                                                                 
 dense_4 (Dense)             (None, 16)                272       
                                                                 
 dense_5 (Dense)             (None, 7)                 119       
                                                                 
Total params: 16,663
Trainable params: 16,663
Non-trainable params: 0
__________________________________________________

In [36]:
epochs = 550
history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs)

Epoch 1/550
Epoch 2/550
Epoch 3/550
Epoch 4/550
Epoch 5/550
Epoch 6/550
Epoch 7/550
Epoch 8/550
Epoch 9/550
Epoch 10/550
Epoch 11/550
Epoch 12/550
Epoch 13/550
Epoch 14/550
Epoch 15/550
Epoch 16/550
Epoch 17/550
Epoch 18/550
Epoch 19/550
Epoch 20/550
Epoch 21/550
Epoch 22/550
Epoch 23/550
Epoch 24/550
Epoch 25/550
Epoch 26/550
Epoch 27/550
Epoch 28/550
Epoch 29/550
Epoch 30/550
Epoch 31/550
Epoch 32/550
Epoch 33/550
Epoch 34/550
Epoch 35/550
Epoch 36/550
Epoch 37/550
Epoch 38/550
Epoch 39/550
Epoch 40/550
Epoch 41/550
Epoch 42/550
Epoch 43/550
Epoch 44/550
Epoch 45/550
Epoch 46/550
Epoch 47/550
Epoch 48/550
Epoch 49/550
Epoch 50/550
Epoch 51/550
Epoch 52/550
Epoch 53/550
Epoch 54/550
Epoch 55/550
Epoch 56/550
Epoch 57/550
Epoch 58/550
Epoch 59/550
Epoch 60/550
Epoch 61/550
Epoch 62/550
Epoch 63/550
Epoch 64/550
Epoch 65/550
Epoch 66/550
Epoch 67/550
Epoch 68/550
Epoch 69/550
Epoch 70/550
Epoch 71/550
Epoch 72/550
Epoch 73/550
Epoch 74/550
Epoch 75/550
Epoch 76/550
Epoch 77/550
Epoch 78

In [37]:
# saving model
model.save("chat_model")

import pickle

# saving tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    

# saving label encoder
with open('label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)



In [38]:
pip install colorama

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [40]:
import json 
import numpy as np
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
import re
import colorama 
colorama.init()
from colorama import Fore, Style, Back

import random
import pickle

with open("intents.json") as file:
    data = json.load(file)


def chat():
    # load trained model
    model = keras.models.load_model('chat_model')

    # load tokenizer object
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)

    # load label encoder object
    with open('label_encoder.pickle', 'rb') as enc:
        lbl_encoder = pickle.load(enc)

    # parameters
    max_len = 20
    
    while True:
        print(Fore.LIGHTBLUE_EX + "User: " + Style.RESET_ALL, end="")
        inp = input()
        if inp.lower() == "quit":
            break

        result = model.predict(keras.preprocessing.sequence.pad_sequences(tokenizer.texts_to_sequences([inp]),
                                             truncating='post', maxlen=max_len))
        tag = lbl_encoder.inverse_transform([np.argmax(result)])
        print(tag)

        for i in data['intents']:
            if tag == 'add':
              inp = input()
              numbers_pattern = re.compile(r'\d+')
              numbers = numbers_pattern.findall(inp)
              num1, num2 = float(numbers[0]), float(numbers[1])
              result = num1 + num2
              print(f"Bot: The sum of {num1} and {num2} is {result}")
              break
            elif tag == 'subtract':
              inp = input()
              numbers_pattern = re.compile(r'\d+')
              numbers = numbers_pattern.findall(inp)
              num1, num2 = float(numbers[0]), float(numbers[1])
              result = num1 - num2
              print(f"Bot: The difference between {num1} and {num2} is {result}")
              break
            elif tag == 'multiply':
              inp = input()
              numbers_pattern = re.compile(r'\d+')
              numbers = numbers_pattern.findall(inp)
              num1, num2 = float(numbers[0]), float(numbers[1])
              result = num1 * num2
              print(f"Bot: The product of {num1} and {num2} is {result}")
              break
            elif tag == 'division':
              inp = input()
              numbers_pattern = re.compile(r'\d+')
              numbers = numbers_pattern.findall(inp)
              num1, num2 = float(numbers[0]), float(numbers[1])
              if num2 != 0:
                result = num1/num2
                print(f"The Division is {result}")
                break
              else:
                print(f"The division is infinity")
                break
            if i['tag'] == tag and tag not in ['addition','subtraction','multiplication','division']:
              print(Fore.GREEN + "ChatBot:" + Style.RESET_ALL , np.random.choice(i['responses']))

        # print(Fore.GREEN + "ChatBot:" + Style.RESET_ALL,random.choice(responses))

print(Fore.YELLOW + "Start messaging with the bot (type quit to stop)!" + Style.RESET_ALL)
chat()


Start messaging with the bot (type quit to stop)!
User: hi
['greeting']
ChatBot: Hi there! How can I help you?
User: what can you do
['job']
ChatBot: I can do addition,subtraction,Multiplication and divison
User: add
['add']
2 3
Bot: The sum of 2.0 and 3.0 is 5.0
User: subtract
['subtract']
2 3
Bot: The difference between 2.0 and 3.0 is -1.0
User: quit
