In [None]:
import torch

In [None]:
myjson = pandas.read_json('myjson.json')

In [None]:
#!pip install nltk

In [None]:
from nltk.tokenize import TweetTokenizer
tokenizer = TweetTokenizer()

In [None]:
#!pip install stemming
from stemming.porter2 import stem

In [None]:
allword = [] # list of all tokenized words
tags = [] # list of tags
xy = [] # list of tuples containing (tokenized words, tag)

for element in myjson['list']:
    tags.append(element['tag']) # greeting, goodbye, ...
    for inputs in element['input']:
        w = tokenizer.tokenize(inputs.lower()) # break sentence smaller  
        allword.extend(w) # gather all words
        xy.append((w,stem(element['tag']))) # words + tags

tags = [stem(word) for word in tags]

In [None]:
# exclude the following
exclude = ['?', '!', ',', '.', ':', ';']
for x in allword:
    if x in exclude:
        allword.remove(x)       
        
allword = [stem(word) for word in allword] 

In [None]:
import numpy as np
def one_or_none(tokenized_sentence, allword):
    mark = np.zeros(len(allword), dtype = np.float32) # mark everything 0.0
    for indx, w in enumerate(allword):
        if w in tokenized_sentence: # if input word is in allword
            mark[indx] = 1.0 # mark 1.0
    return mark

In [None]:
# optimizing the entire data generation process 
#import torch

x_train = []
y_train = []
for tokenize_sentence, tag in xy:
    tokenize_sentence = [stem(word) for word in tokenize_sentence]
    mark = one_or_none(tokenize_sentence, allword) # mark 0 or 1 
    x_train.append(mark) # list of all the 0s and 1s
    find = tags.index(tag) # find tag of "xy" in "tags" list
    y_train.append(find)

x_train = np.array(x_train)  # run faster with np.array
y_train = np.array(y_train)

# inside xy, having "token" and "tag"   -- xy.append((w,tags))   --- xy is a list of tuple


In [None]:
from torch.utils.data import Dataset,  DataLoader

class PyTorBotDataset(Dataset):
    def __init__(self):
        self.len = len(x_train)
        self.x_data = x_train
        self.y_data = y_train
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len
    
dataset = PyTorBotDataset()
train_loader = DataLoader(dataset = dataset, batch_size = 100, shuffle = True, num_workers = 0)  


# --> use in train loop

# num_workers: if you data for multiple processes, specify the number of process here
# if get errors, try num_workers = 0

In [None]:
# perception classifier p.67
# look at the hidden layers and number of classes from the picture to see the linear layer

import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self, inputsize, hiddensize, outputsize):
        super(NeuralNetwork, self).__init__() 
        self.l1 = nn.Linear(inputsize, hiddensize)
        self.l2 = nn.Linear(hiddensize, hiddensize)
        self.l3 = nn.Linear(hiddensize, outputsize)
        self.relu = nn.ReLU() 
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        
        out = self.l2(out)
        out = self.relu(out)
        
        out = self.l3(out)
        return out
    
    

In [None]:
# check if device has cuda or not
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#print(device)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

In [None]:
outsize = len(tags)
insize = len(allword)
hiddensize = 8
model = NeuralNetwork(insize, hiddensize, outsize).to(device)

In [None]:
from torch.autograd import Variable

epochs = 500
CEL = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

for epoch in range(epochs):
    for (words, labels) in train_loader:
       
        # step 1: get inputs
        words = words.to(device)
        labels = labels.to(device)
        
        # step 2: zero gradients
        optimizer.zero_grad()
        
        # step 3: forward pass: compute predicted y
        y_pred = model(words)
        
        # step 4: compute the loss value that we wish to optimize
        # --> expected "labels" have type Long but found Int 
        # cast the labels to long 
        labels = labels.to(dtype = torch.long)  
        loss = CEL(y_pred, labels) 
        
          
        # step 5: propagate backward the loss
        loss.backward()
        
        # step 6: update weights by using optimizer
        optimizer.step()
    
    # print loss
    if ((epoch + 1) % 100 == 0):
        print(f'epoc {epoch+1}/{epochs}, loss = {loss.item():.4f}')
        
print(f'final loss = {loss.item():.4f}')

In [None]:
# save data
alldata = {
    "modelstate": model.state_dict(),
    "allwords": allword,
    "tags": tags,
    "inputsize": insize,
    "hiddensize": hiddensize,
    "outputsize": outsize
}

file = "alldata.pth"
torch.save(alldata, file)
print("saved")

In [None]:
# import random: random choice of possible responses
import random
data = torch.load(file)

allwords = data["allwords"]
thistag = data["tags"]
model_state = data["modelstate"]

model = NeuralNetwork(data["inputsize"], data["hiddensize"], data["outputsize"]).to(device)
model.load_state_dict(model_state)
model.eval()

In [None]:
botname = 'PyTorBot: '
print("Hello Hello. PyTorBot is here! How can I help you? Enter 'quit' to exit")
while True:
    userinput = input("PyTorBot's dear friend: ")
    if userinput == 'quit':
        break
    userinput = tokenizer.tokenize(userinput)
    userinput = [stem(word) for word in userinput]
    
    tensorx = one_or_none(userinput, allwords)
    tensorx = tensor_x.reshape(1, tensorx.shape[0])
    tensorx = torch.from_numpy(tensorx)
    
    output = model(tensorx)
    _, predicted = torch.max(output, dim = 1)
    tag = thistag[predicted.item()]
    
    probability = torch.softmax(output, dim = 1)
    prob = probability[0][predicted.item()]
    
    if prob.item() > 0:
        # check the tag in json list
        for element in myjson["list"]:
            if tag == stem(element["tag"]):
                print(botname, random.choice(element["responses"]))
    else:
        print(botname, "Sorry. I don't get it. Can you help me clarify that?")
     

    