In [44]:
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer

import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import random

stemmer = PorterStemmer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [45]:
def tokenize(sentence):
  return nltk.word_tokenize(sentence)

def stem(word):
  return stemmer.stem(word.lower())

def bagofWords(toeknizeSentence, allWords):
  wordsSentence = [stem(i) for i in toeknizeSentence]
  bag = np.zeros(len(allWords), dtype = np.float32)
  for index, word in enumerate(allWords):
    if word in wordsSentence:
      bag[index] = 1.0
    
  return bag

In [46]:
a = "How are you doing ?"
tokens = tokenize(a)
stmmed = [stem(i) for i in tokens]
sentence = ["hello", "how", "are", "you"]
words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
abg = bagofWords(sentence, words)

In [47]:
abg

array([0., 1., 0., 1., 0., 0., 0.], dtype=float32)

In [48]:
with open('intents.json', 'r') as i:
  intent = json.load(i)

words = []
tags = []
wordTags = []

for intent in intent['intents']:
  tag = intent['tag']
  tags.append(tag)
  for pattern in intent['patterns']:
    w = tokenize(pattern)
    words.extend(w)
    wordTags.append((w, tag))

ignore_words = ['?', '.', '!']
words = [stem(i) for i in words if i not in ignore_words]
words = sorted(set(words))
tags = sorted(set(tags))

In [49]:
x_train = []
y_train = []

for (pattern_sentence, tag) in wordTags:
  wordBag = bagofWords(pattern_sentence, words)
  x_train.append(wordBag)

  label = tags.index(tag)
  y_train.append(label)

xTrain = np.array(x_train)
yTrain = np.array(y_train)

num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(xTrain[0])
hidden_size = 8
output_size = len(tags)

class ChatDataset(Dataset):
  def __init__(self, x_train, y_train):
    self.n_samples = len(x_train)
    self.x_data = x_train
    self.y_data = y_train

  def __getitem__(self, index):
    return self.x_data[index], self.y_data[index] 

  def __len__(self):
    return self.n_samples

In [50]:
dataset = ChatDataset(xTrain, yTrain)
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

In [51]:
class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.l1 = nn.Linear(input_size, hidden_size) 
    self.l2 = nn.Linear(hidden_size, hidden_size) 
    self.l3 = nn.Linear(hidden_size, num_classes)
    self.relu = nn.ReLU()
    
  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out)
    out = self.relu(out)
    out = self.l3(out)

    return out

In [52]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
    for (word, labels) in train_loader:
        word = word.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
       
        outputs = model(word)
    
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": words,
"tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')

Epoch [100/1000], Loss: 1.4040
Epoch [200/1000], Loss: 0.1349
Epoch [300/1000], Loss: 0.0212
Epoch [400/1000], Loss: 0.0139
Epoch [500/1000], Loss: 0.0028
Epoch [600/1000], Loss: 0.0008
Epoch [700/1000], Loss: 0.0007
Epoch [800/1000], Loss: 0.0026
Epoch [900/1000], Loss: 0.0007
Epoch [1000/1000], Loss: 0.0009
final loss: 0.0009
training complete. file saved to data.pth


In [55]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



FILE = "data.pth"
data = torch.load(FILE)

input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()

bot_name = "Jarvis"
print("Let's chat! (type 'quit' to exit)")
while True:
    # sentence = "do you use credit cards?"
    sentence = input("You: ")
    if sentence == "quit":
        break

    sentence = tokenize(sentence)
    X = bagofWords(sentence, words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)
    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: I do not understand...")

Let's chat! (type 'quit' to exit)
You: Hii
Jarvis: Hi there, how can I help?
You: quit
