In [1]:
!pip install PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
 
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)





In [2]:
downloaded = drive.CreateFile({'id':"1ZxhzlSTrEbO4GVaGrkqawfeoeFDmPvWl"})  
downloaded.GetContentFile('intents.json')     
#if choosing a different dataset, modify the above file location and name

In [3]:
#importing relevant libraries
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import random
import json
import nltk
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
nltk.download('punkt')
plt.style.use('classic')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
%matplotlib inline

In [5]:
#creating custom functions

def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    return stemmer.stem(word.lower())

In [6]:
#stemming using Porter Stemmer model from NLTK library
#stemming is the process of training the model to understand all different forms of the same word 
#for example: play, played, playing


from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()


In [7]:
#Creating a bag of words - which means splitting each word in the sentences and adding it to an array.
#If we have a array of sentences = ["hello", "how", "are", "you"] and an array of total words = ["hi", "hello", "I", "you", "bye", "thank", "cool"] then its bag of words array will be bog = [ 0 , 1 , 0 , 1 , 0 , 0 , 0].

def bag_of_words(tokenized_sentence, words):
    """
    return bag of words array:
    1 for each known word that exists in the sentence, 0 otherwise
    example:
    sentence = ["hello", "how", "are", "you"]
    words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
    bog   = [  0 ,    1 ,    0 ,   1 ,    0 ,    0 ,      0]
    """
    # stem each word
    sentence_words = [stem(word) for word in tokenized_sentence]
    # initialize bag with 0 for each word
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in sentence_words: 
            bag[idx] = 1

    return bag

In [8]:
with open('intents.json', 'r') as f:
    intents = json.load(f)
#if choosing different dataset, modify the above code

In [9]:
#The following will unpack the content of the file in order to get the right information
#Tokenization below is used to separate all the tags and words into their separate lists

all_words = []
tags = []
xy = []
# loop through each sentence in our intents patterns
for intent in intents['intents']:
    tag = intent['tag']
    # add to tag list
    tags.append(tag)
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = tokenize(pattern)
        # add to our words list
        all_words.extend(w)
        # add to xy pair
        xy.append((w, tag))

In [10]:
#This piece of code is responsible for cleaning the data by implementing previously created functions

# stem and lower each word
ignore_words = ['?', '.', '!'] #For this assignment, I want my chatbot to ignore these.
all_words = [stem(w) for w in all_words if w not in ignore_words]
# remove duplicates and sort
all_words = sorted(set(all_words))
tags = sorted(set(tags))

print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(all_words), "unique stemmed words:", all_words)


155 patterns
28 tags: ['appreciation', 'chatbots', 'coffeequeriesprice', 'coffeequeriestype', 'delivery', 'deliveryquery', 'funny', 'funnycomment1', 'goodbye', 'greeting', 'introduction1', 'introduction2', 'introduction3', 'introduction4', 'items', 'mood', 'name', 'nod', 'nope', 'ordering1', 'ordering2', 'orderingfail', 'payments', 'roast', 'teaqueriesprice', 'teaqueriestype', 'thanks', 'usermood']
121 unique stemmed words: ["'", "'m", "'s", "'there", ',', 'a', 'about', 'accept', 'alright', 'am', 'americano', 'an', 'and', 'anyon', 'are', 'bar', 'by', 'bye', 'cafeteria', 'can', 'cappuccino', 'card', 'cash', 'chatbot', 'coffe', 'cool', 'cours', 'credit', 'day', 'deliv', 'deliveri', 'desir', 'do', 'doe', 'doubl', 'earl', 'espresso', 'fine', 'for', 'funni', 'get', 'go', 'good', 'goodby', 'green', 'grey', 'hard', 'have', 'heeeeeeeeeeeeeeeeeeeeey', 'hello', 'help', 'hey', 'hi', 'hola', 'how', 'i', 'is', 'it', 'item', 'job', 'joke', 'kind', 'know', 'larg', 'later', 'latt', 'like', 'long', 'lo

In [11]:
#Creating the training data 
#Transforming it into a format that PyTorch Model can understand

X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    # X: bag of words for each pattern_sentence
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
    #One hot encoding Is the process of splitting multiclass or multi valued data column to separate columns and labelling the cell 1 in the row where it exists.
    label = tags.index(tag)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)


In [12]:
#Creating the model, It is a feed forward neural Network which will has 3 Linear Layers and is using activation function “ReLU”.
#A feedforward neural network is an artificial neural network wherein connections between the nodes do not form a cycle. 
#As such, it is different from its descendant: recurrent neural networks. The author decided to use FFN instead of RNN because of its simplicity.
#As a beginner in Python, too much complexity in the project could easily backfire.

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__() #I am using super to inherit the properties of its parent class
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.l2 = nn.Linear(hidden_size, hidden_size) 
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

#The rectified linear unit, or ReLU function, which is a piece wise linear function that outputs zero if its input is negative, and directly outputs the input otherwise:  
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        # no activation and no softmax at the end
        return out

#At this point in the program, a class from NN.Module is inherited.
#In the next section its model and layers will be customised.

In [13]:
#Assigning the Dataset to the Model.

class ChatDataset(Dataset):

    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples
        

In [14]:

# Hyper-parameters - every Neural Network needs a set of them. They must be set before use.
#They were changed many times.
num_epochs = 1000 
batch_size = 8 
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8 #was 8
output_size = len(tags)
print(input_size, output_size)

121 28


In [15]:
#Implementing the model along with loss and optimizer functions

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size, hidden_size, output_size).to(device)

#Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


In [16]:
#Training the model

for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model(words)
        
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

        

data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

Epoch [100/1000], Loss: 0.1407
Epoch [200/1000], Loss: 0.0280
Epoch [300/1000], Loss: 0.0032
Epoch [400/1000], Loss: 0.0008
Epoch [500/1000], Loss: 0.0005
Epoch [600/1000], Loss: 0.0001
Epoch [700/1000], Loss: 0.0000
Epoch [800/1000], Loss: 0.0001
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000
final loss: 0.0000


In [17]:
#Saving the trained model

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')


training complete. file saved to data.pth


In [18]:
#Loading the saved model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

with open('intents.json', 'r') as json_data:
    intents = json.load(json_data)

FILE = "data.pth"
data = torch.load(FILE)

input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()


NeuralNet(
  (l1): Linear(in_features=121, out_features=8, bias=True)
  (l2): Linear(in_features=8, out_features=8, bias=True)
  (l3): Linear(in_features=8, out_features=28, bias=True)
  (relu): ReLU()
)

In [19]:

#Using the chatbot
#To increase the chatbot's generalized knowledge, it must be trained on a bigger data set.
bot_name = "Cronk"
print("Start a chat with Cronk! (type 'quit' to exit)")
while True:
    sentence = input("You: ")
    if sentence == "quit":
        break

    sentence = tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: I do not understand...")

Start a chat with Cronk! (type 'quit' to exit)
You: hi
Cronk: Hi there, what can I do for you?
You: hola
Cronk: Hey hi hello!
You: hello
Cronk: Hi there, how can I help?
You: what's going on?
Cronk: Yes?
You: How are You?
Cronk: I do not understand...
You: How are you?
Cronk: I do not understand...
You: How is your day going?
Cronk: Yeah, things are good, and You?
You: I'm fine, thank You.
Cronk: Keep it up!
You: What do you do here?
Cronk: We have coffee and tea
You: I would like a coffee.
Cronk: What kind and which capacity? Don't forget to add -please- :)
You: Coffee kinds?
Cronk: You name it, we got it.
You: I would like a small coffee.
Cronk: What kind and which capacity? Don't forget to add -please- :)
You: Small coffee please.
Cronk: Order received, funds deducted from your account. Have a sit.
You: Thank You.
Cronk: My pleasure
You: Tell me about yourself.
Cronk: I do not understand...
You: Can You tell me something about yourself?
Cronk: I am Cronk, the chatbot. I help clients