In [1]:
import pandas as pd
import numpy as np 
import json

!pip3 install torch
import torch 
import torch.nn as nn 
from torch.utils.data import Dataset, DataLoader

!pip3 install nltk
import nltk
from nltk.tokenize import TreebankWordTokenizer
from nltk.stem.porter import PorterStemmer

import random



In [2]:
with open('newIntents.json') as f:
    data = json.load(f)
print(data)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Hey', 'How are you', 'Is anyone there?', 'Hello', 'Good day'], 'responses': ['Hey :-)', 'Hello, thanks for visiting', 'Hi there, what can I do for you?', 'Hi there, how can I help?', 'Welcome to booking ticket moviee booking']}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye'], 'responses': ['See you later, thanks for visiting', 'Have a nice day', 'Bye! Come back again soon.', 'Have a good day', 'Have a great day', '']}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful", "Thank's a lot!"], 'responses': ['Happy to help!', 'Any time!', 'My pleasure']}, {'tag': 'tickets', 'patterns': ['what type of tickets are available?', 'what are the ticket prices?', 'How many tickets are available?', 'How many gold tickets are available?', 'How many platinum tickets are available?', 'How many silver tickets are available?', 'How many gold seats are their?', 'How many silver seats are their?', 'How many platinum

In [3]:
all_words = []
tags = []
xy = []

In [4]:
#initializing the tokenizer
tokenizer = TreebankWordTokenizer()
stemmer = PorterStemmer()
#traversing through the loop
for intent in data['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
    #taking out the patterns and the tokenizing
        w = tokenizer.tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))

In [5]:
ignore_syntax = ["?","/",".","!", "@", "#", "$"]
all_words = [stemmer.stem(w) for w in all_words if w not in ignore_syntax]

#to remove the duplicate values
all_words = sorted(set(all_words))
tags = sorted(set(tags))
print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(all_words), "unique stemmed words:", all_words)

52 patterns
8 tags: ['delivery', 'funny', 'goodbye', 'greeting', 'movie', 'payments', 'thanks', 'tickets']
96 unique stemmed words: ["'s", '2', '3d', 'a', 'accept', 'all', 'allow', 'ambul', 'anyon', 'are', 'arriv', 'avail', 'be', 'bye', 'can', 'card', 'cash', 'come', 'contractor', 'credit', 'day', 'deliveri', 'discount', 'do', 'doe', 'error', 'food', 'for', 'funni', 'get', 'gold', 'good', 'goodby', 'hello', 'help', 'hey', 'hi', 'how', 'i', 'imposs', 'insid', 'is', 'joke', 'k.g.f', 'know', 'languag', 'later', 'long', 'lot', 'mani', 'mastercard', 'me', 'mission', 'moonfal', 'movi', 'my', 'not', 'of', 'onli', 'option', 'pay', 'payment', 'paypal', 'platinum', 'price', 'rrr', 'screen', 'seat', 'see', 'ship', 'shown', 'silver', 'slot', 'someth', 'spiderman', 'student', 'take', 'tell', 'thank', 'that', 'the', 'their', 'there', 'ticket', 'till', 'time', 'type', 'we', 'what', 'when', 'where', 'which', 'while', 'will', 'with', 'you']


In [6]:
def bag_of_words(tokenized_sentence, words):
    # stem each word
    sentence_words = [stemmer.stem(word) for word in tokenized_sentence]
    # initialize bag with 0 for each word
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in sentence_words: 
            bag[idx] = 1

    return bag

In [7]:
# create training data
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    # X: bag of words for each pattern_sentence
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
    label = tags.index(tag)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [8]:
# Hyper-parameters 
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
print(input_size, output_size)

96 8


In [9]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.l2 = nn.Linear(hidden_size, hidden_size) 
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        # no activation and no softmax at the end
        return out

In [10]:
class ChatDataset(Dataset):

    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size, hidden_size, output_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')

Epoch [100/1000], Loss: 0.3624
Epoch [200/1000], Loss: 0.0295
Epoch [300/1000], Loss: 0.0025
Epoch [400/1000], Loss: 0.0147
Epoch [500/1000], Loss: 0.0012
Epoch [600/1000], Loss: 0.0007
Epoch [700/1000], Loss: 0.0027
Epoch [800/1000], Loss: 0.0002
Epoch [900/1000], Loss: 0.0006
Epoch [1000/1000], Loss: 0.0000
final loss: 0.0000
training complete. file saved to data.pth


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [13]:
with open('newIntents.json', 'r') as json_data:
    intents = json.load(json_data)

FILE = "data.pth"
data = torch.load(FILE)

input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()

bot_name = "Sam"
print("Let's chat! (type 'quit' to exit)")
while True:
    # sentence = "do you use credit cards?"
    sentence = input("You: ")
    if sentence == "quit":
        break

    sentence = tokenizer.tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: I do not understand...")
        break

Let's chat! (type 'quit' to exit)
Sam: Hi there, what can I do for you?
Sam: Hi there, what can I do for you?
Sam: We have 1 platinum ticket left
Sam: We have 2 gold tickets
Sam: We have 1 silver ticket left
Sam: Have a great day
Sam: Yes, student discount is available
Sam: We have 1 gold ticket left
Sam: We have 1 silver ticket left
Sam: We have 2 platinum ticket
Sam: We have 2 silver ticket
Sam: We have 2 gold tickets
Sam: Hi there, what can I do for you?
Sam: We have 2 gold tickets
Sam: We have 3 silver ticket
Sam: We have 2 silver ticket
Sam: We have some platinum tickets available
Sam: We have 2 platinum ticket
Sam: We have 3 platinum ticket
Sam: We have platinum,gold and silver seats
Sam: We have 3 gold ticket
Sam: We have 1 platinum ticket left
Sam: We have some gold tickets left
Sam: We have some gold tickets left
Sam: We have 2 silver ticket
Sam: We have 2 silver ticket
Sam: We have some gold tickets left
Sam: We have 2 gold tickets
Sam: We have 2 silver ticket
Sam: We have 1 

KeyboardInterrupt: Interrupted by user