In [None]:
import json
import numpy as np
from tokenization_stemming import class_tokenize, class_stemming, class_bag_of_words

with open('intents.json', 'r') as f:
    object_storedjsonfile = json.load(f)
    
all_words = [] #patterns from intents.json
show_tags = [] #empty list, tags from intents file stored here in the form of array
xy = [] #holds patterns and tags from intents.json to analyze patterns and tags

for i in object_storedjsonfile['intents']:
    obj_tag = i['tag'] #looping through tags
    show_tags.append(obj_tag) #stored tag
    #within the i, runs the p
    for p in i['patterns']:
        tokenized_pattern =  class_tokenize(p) #tokenization of pattern, is tokenized pattern
        #put tokenized patter in all_words
        all_words.extend(tokenized_pattern) #its an array
        xy.append((tokenized_pattern, obj_tag))
        
words_to_ignore = ['?', '!', '.', ',', '{', ':', ';', '"', '#', '$', '&']
all_words = [class_stemming(w) for w in all_words if w not in words_to_ignore]
all_words = sorted(set(all_words)) #allwords has tokenized patter,
show_tags = sorted(set(show_tags))

# print("\nall the tokenized words in the json file:")
# print(all_words)
# print("\ntags from the intents.json file:")
# print(show_tags)
x_train =[] #bag of words
y_train = [] 

for (tokenized_pattern, obj_tag) in xy:
    bag = class_bag_of_words(tokenized_pattern, all_words) #patterned_sentence contains tokenized pattern
    x_train.append(bag)
    y = show_tags.index(obj_tag) #data for y_train
    y_train.append(y) #cross ENtRopyloss
    
#converted to array
x_train = np.array(x_train)
#print(f'\n x_train is', x_train)
y_train = np.array(y_train)
#print(f'\n y_train is:', y_train)
    

# Next, creating dataset from x_train and y_train

In [None]:
import math
from torch.utils.data import Dataset, DataLoader

batch_size = 8

class ChatDataset(Dataset): #__init___ is a constructor, setup for objects
    
    def __init__(self, x_train, y_train): #data loading
        self.n_samples = len(x_train) #initializes n_samples with length of x_train
        self.x_data = x_train
        self.y_data = y_train

    def __getitem__(self, index): #to use index, makes object iterable [dataset 0]
        return self.x_data[index], self.y_data[index] #as a tuple

    def __len__(self):   #length of the dataset
        return self.n_samples

    
new_created_datset__ = ChatDataset(x_train, y_train)

train_loader = DataLoader(dataset= new_created_datset__, batch_size = batch_size, shuffle = True) 

#shuffle data every epoch. helps in reducing overfitting and data is trained in different order
#nn_workers number of subprocesses to use for data loading. multiple cpus, can speed up the data, not needed here

first_data = new_created_datset__.__getitem__(0)
#print(f"first_data is an array using DataLoader: \n ", first_data)

#printing dataset to get an idea of the training datset
total_samples = len(new_created_datset__)
n_iter = math.ceil(total_samples/8) # in 1 epoch
#print("samples in training, in the class (x_train, y_train) and number of iterations for epoch: \n", total_samples, n_iter)
#print(" \n froma above, every epoch has 5 steps")

#epoch is number of times it will iterate throughout each forward loop


In [None]:

import torch 
import torch.nn as nn
#print("nbformat version:", nbformat.__version__)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#hyperparameters
hidden_size = 8
output_size = len(show_tags) #number of classes i.e. tags
input_size = len(x_train[0]) #length of bag of words, bag of words has same length as all_words #obviously, x[0] as the array of first of bag of words
batch_size = 8
learning_rate = 0.001
number_of_epochs = 1000

#print("input_size", input_size, len(all_words))
#print("output_size contains the length of show_tags", output_size, len(show_tags), show_tags)

#creating my model using pytorch

class classNeuralNet(nn.Module): # defined the basic layers 
    def __init__(self, input_size, hidden_size, num_classes): #feedforward neural net with 2 layers
        # __init__ constructor fn, initializes layers and relu
        super(classNeuralNet, self).__init__() #calls constructor from nn.module to ensure initialization
        self.l1 = nn.Linear(input_size, hidden_size) #nn.linear accepts inputsize and output size
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear( hidden_size, num_classes)
        self.relu = nn.ReLU() #activation function, non linenar transformation for a better fit
        
    def forward (self, x): #to make it functional, specifies how input data passes through network layers
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out) #no softmax at the end
        
        return out
        

model = classNeuralNet(input_size, hidden_size, output_size).to(device)

# loss and optimizer

criterion = nn.CrossEntropyLoss() #to predict class prob and evaluate against the actual class label. 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

#training loop
for epoch in range(number_of_epochs):
    for (words, labels) in train_loader: #words and labels are the data, iterates over bacthes of data provided by trainloader
        words = words.to(device) #[70, .......]
        labels = labels.to(dtype=torch.long).to(device)
        
        outputs = model(words) #forward path
        loss_obj = criterion(outputs, labels) #predicted outputs and the actual labels
        
        optimizer.zero_grad() #clear prior gradients
        loss_obj.backward() # computes graidients of the loss fn wrt the parameters
        optimizer.step() #uses gradient stored in the parameter, updates models parameters
        
        # if (epoch+1) % 1000 == 0:
        #     print(f'each epoch = {epoch+1}/{number_of_epochs}, loss={loss_obj.item():.4f}')       

print(f'\n final loss = {loss_obj.item():.4f}') 


##### 100 samples , batch size of 20 then 100/20 = 5 iterations for 1 epoch
# epoch is 1 forward snd backward pass of all training samples ie in entire dataset
# iterations are passes, each pass using batch size
# batch size is number of training samples in one forward and backward pass

In [None]:

#learning_rate = 0.001, 
# controls step size of each iteration while moving towards a min of loss function
#The learning rate affects how much to adjust the weights of the model with respect to the gradient.
# A high learning rate can cause the optimizer to overshoot the minimum, 
# while a low learning rate can result in a slow convergence.


In [None]:
#save information in data
data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "output_size": output_size,
    "all_words": all_words, 
    "hidden_size": hidden_size,
    "show_tags": show_tags,
}

FILE = "data.pth"
torch.save(data, FILE) # saves data

#print(f"training complete, file saved to FILE")

In [None]:
#final chat interface

import random
#from IPython.display import display, Markdown


with open('intents.json', 'r') as f:
    obj1 = json.load(f)

data_file = torch.load(FILE, weights_only=True)

input_size = data["input_size"] 
output_size = data["output_size"]
all_words = data["all_words"]
model_state = data["model_state"]
hidden_size = data["hidden_size"]
show_tags = data["show_tags"]

model.load_state_dict(model_state) #learnt parameters
model.eval()

# chat loop 
bot_name =  " Skay"
#print("\nFor ease of reference, you can use 'Skay' to address me.")

# first way to implement chatbot.
# print("\n Skay: \nPlease enter your question. To exit, type 'quit' in lowercase.")

# while True:
#     sentence = input('You: ')
#     if sentence == "quit":
#         break
#     sentence = class_tokenize(sentence)
#     x = class_bag_of_words(sentence, all_words)
#     x = x.reshape(1, x.shape[0]) #0 is 0 number of columns
#     x = torch.from_numpy(x) #tensor
    
#     obj_modeloutput = model(x)
#     _,predicted = torch.max(obj_modeloutput, dim=1)
#     tag = show_tags[predicted.item()] # tag is from intents.json, predicted.item() is class label
    
#     probs = torch.softmax(obj_modeloutput, dim=1)
#     prob = probs[0][predicted.item()]
    
#     if prob.item() > 0.75:
#         for i in obj1["intents"]:
#             if tag  == i["tag"]:
#                 print("\n")
#                 print(f"{bot_name}:")
#                 display(Markdown(random.choice(i['responses'])))
                
#     else:
#         print (f"{bot_name}: I do not understand, could you please repeat")
        

In [None]:
# 2nd way
# using function as it's an efficient way to integrate and deploy to a webpage

bot_name =  " Skay"

def chat_response(msg):
    sentence = class_tokenize(msg)
    X = class_bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = show_tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for k in obj1['intents']:
            if tag == k["tag"]:
                return random.choice(k['responses'])
    
    return "Could you please type in again..."


if __name__ == "__main__":
    print("\nFor ease of reference, you can use 'Skay' to address me.")
    print("\n Skay: \nPlease enter your question. To exit, type 'quit' in lowercase.")
    while True:
        sentence = input("You: ")
        if sentence == "quit":
            break

        resp = chat_response(sentence)
        print("\n")
        print(f"{bot_name}:", resp)