In [None]:
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
import json
from google.colab import files

uploaded = files.upload()

Saving intents.json to intents.json


In [None]:

intents = json.loads(uploaded['intents.json'].decode("utf-8"))


In [None]:
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer 

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
def tokenize(sentence):
  return nltk.word_tokenize(sentence)

In [None]:
stemmer = PorterStemmer()
def stem(word):
  return stemmer.stem(word.lower())

In [None]:
def bag_of_words(tokenized_sentence, all_words):
  tokenized_sentence = [stem(word) for word in tokenized_sentence]
  

  bag = np.zeros(len(all_words), dtype=np.float32)
  for idx, w, in enumerate(all_words):
    if w in tokenized_sentence:
      bag[idx] = 1
  
  return bag

In [None]:
all_words = []
tags = []
xy = []

In [None]:
for intent in intents['intents']:
  tag = intent['tag']
  tags.append(tag)
  for pattern in intent['patterns']:
    w = tokenize(pattern) # it's an array so we cannot append it to all_words so we use'extend'
    all_words.extend(w)
    xy.append((w, tag))

print(xy)

[(['Hi'], 'greeting'), (['Hey'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks'), (['Thank', "'s", 'a', 'lot', '!'], 'thanks'), (['Which', 'items', 'do', 'you', 'have', '?'], 'items'), (['What', 'kinds', 'of', 'items', 'are', 'there', '?'], 'items'), (['What', 'do', 'you', 'sell', '?'], 'items'), (['Do', 'you', 'take', 'credit', 'cards', '?'], 'payments'), (['Do', 'you', 'accept', 'Mastercard', '?'], 'payments'), (['Can', 'I', 'pay', 'with', 'Paypal', '?'], 'payments'), (['Are', 'you', 'cash', 'only', '?'], 'payments'), (['Tell', 'me', 'a', 'joke', '!'], 'funny'), (['Tell', 'me', 'something', 'funny', '!'], 'funny'), (['Do', 'you', 'know', 'a', 'joke', '?'], 'funny')]


In [None]:
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
print(all_words)
print(tags)

["'s", 'a', 'accept', 'anyon', 'are', 'bye', 'can', 'card', 'cash', 'credit', 'day', 'do', 'funni', 'good', 'goodby', 'have', 'hello', 'help', 'hey', 'hi', 'how', 'i', 'is', 'item', 'joke', 'kind', 'know', 'later', 'lot', 'mastercard', 'me', 'of', 'onli', 'pay', 'paypal', 'see', 'sell', 'someth', 'take', 'tell', 'thank', 'that', 'there', 'what', 'which', 'with', 'you']
['funny', 'goodbye', 'greeting', 'items', 'payments', 'thanks']


In [None]:
X_train = []
Y_train = []
for (pattern_sentence, tag) in xy:
  bag = bag_of_words(pattern_sentence, all_words)
  X_train.append(bag)

  label = tags.index(tag)
  Y_train.append(label) # Cross Entropy Loss

X_train = np.array(X_train)
Y_train = np.array(Y_train)

In [None]:
# Hyperparameters
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = 0.001
num_epochs = 1000

class ChatDataset(Dataset):
  def __init__(self):
    self.n_samples = len(X_train)
    self.x_data = X_train
    self.y_data = Y_train

  # dataset[idx]
  def __getitem__(self, index):
    return self.x_data[index], self.y_data[index]

  def __len__(self):
    return self.n_samples





dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:

%run '/content/drive/My Drive/Miniproject_5sem/model.ipynb' import NeuralNet

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device)

In [None]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
  for (words, labels) in train_loader:
    words = words.to(device)
    labels = labels.to(dtype=torch.long).to(device)

    #forward
    outputs = model(words)
    loss = criterion(outputs, labels)

    # Backword and optimizer step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  
  if (epoch+1)%100 == 0:
    print(f'epoch {epoch+1}/{num_epochs}, loss={loss.item():.4f}')

print(f'final loss, loss={loss.item():.4f}')

epoch 100/1000, loss=1.1729
epoch 200/1000, loss=0.5311
epoch 300/1000, loss=0.0604
epoch 400/1000, loss=0.0191
epoch 500/1000, loss=0.0129
epoch 600/1000, loss=0.0029
epoch 700/1000, loss=0.0020
epoch 800/1000, loss=0.0018
epoch 900/1000, loss=0.0013
epoch 1000/1000, loss=0.0006
final loss, loss=0.0006


In [None]:
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')

training complete. file saved to data.pth
