In [195]:
# %%writefile Modules/train.py
import json
from Modules.nltk_utils import tokenize, stem, bag_of_words
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from Modules.model import NeuralNet
from torchinfo import summary

with open("intents.json", "r") as f:
    data = json.load(f)
    

In [196]:
all_words = []
tags = []
xy = []

for intent in data["intents"]:
    tag = intent["tag"]
    tags.append(tag)
    
    for pattern in intent["patterns"]:
        w = tokenize(pattern)
#         print(w)
        all_words.extend(w)
        xy.append((w, tag))

ignore_words = ["?", "!", ".", ","]   

all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
# print(all_words)
 
x_train = []
y_train = []

for (pattern_sentence, tag) in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    x_train.append(bag)
    label = tags.index(tag)
    y_train.append(label)

y_train = torch.tensor(y_train)
x_train = torch.tensor(x_train)
x_train

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [197]:
class ChatDataSet(Dataset):
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
    
    def __len__(self):
        return len(x_train)
    
    def __getitem__(self, idx):
        return self.x_train[idx], self.y_train[idx]

#hyperparameters
batch_size = 8
input_size = len(x_train[0])  #len(x_train[0]) same as len(all_words) same as len(bag)
hidden_size = 8
num_classes = len(tags)
learning_rate = 0.001
num_epochs = 1000

dataset = ChatDataSet(x_train, y_train)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

next(iter(train_loader))[0].shape

torch.Size([8, 58])

In [198]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [199]:
model = NeuralNet(input_size, hidden_size, num_classes)
model = model.to(device)

In [200]:
len(bag)

58

In [203]:
summary(model,
        input_size=(batch_size, len(bag)),
        col_names=["output_size", "num_params", "trainable", "mult_adds"],
        col_width=17,
#         row_settings=["var_names"]
       )

Layer (type:depth-idx)                   Output Shape      Param #           Trainable         Mult-Adds
NeuralNet                                [8, 7]            --                True              --
├─Linear: 1-1                            [8, 8]            472               True              3,776
├─ReLU: 1-2                              [8, 8]            --                --                --
├─Linear: 1-3                            [8, 8]            72                True              576
├─ReLU: 1-4                              [8, 8]            --                --                --
├─Linear: 1-5                            [8, 7]            63                True              504
Total params: 607
Trainable params: 607
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.01

In [204]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), learning_rate)

In [205]:
len(train_loader)

4

In [206]:
from tqdm.notebook import tqdm
import numpy as np


for epoch in tqdm(range(num_epochs)):
    train_loss = 0
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        out = model(words)
        loss = loss_fn(out, labels)
        train_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    train_loss /= len(train_loader)
    
    if epoch % 90 == 20:
        print(f"epoch:{epoch}/{num_epochs} |train loss: {train_loss:.3f}")


  0%|          | 0/1000 [00:00<?, ?it/s]

epoch:20/1000 |train loss: 1.903
epoch:110/1000 |train loss: 0.730
epoch:200/1000 |train loss: 0.069
epoch:290/1000 |train loss: 0.020
epoch:380/1000 |train loss: 0.009
epoch:470/1000 |train loss: 0.005
epoch:560/1000 |train loss: 0.003
epoch:650/1000 |train loss: 0.002
epoch:740/1000 |train loss: 0.001
epoch:830/1000 |train loss: 0.001
epoch:920/1000 |train loss: 0.001


In [207]:
model.state_dict()

OrderedDict([('l1.weight',
              tensor([[ 3.7893e-01,  6.5365e-01, -4.3237e-01, -1.2081e-01, -3.5467e-01,
                        5.6419e-01, -1.4458e-01, -2.6234e-01, -2.8673e-01, -1.7753e-01,
                       -1.3388e-01, -1.2837e-01, -1.4844e-02, -1.1348e-01,  4.8775e-01,
                        6.0846e-01, -1.0560e-01,  4.7406e-01, -2.5013e-01,  5.0609e-01,
                       -1.5497e-01, -3.5589e-01,  4.9539e-01, -3.8061e-01, -3.2500e-01,
                       -2.1935e-01, -2.1275e-01,  1.3295e-04, -8.8978e-02,  6.6409e-01,
                       -1.0139e-01,  5.1903e-01,  2.3773e-01,  3.8048e-01,  1.2525e-01,
                        2.5789e-01, -3.1619e-01,  7.2091e-01, -1.8488e-01,  4.6369e-02,
                       -1.2213e-01,  3.6950e-01, -1.3079e-01,  4.7420e-03,  3.2010e-01,
                       -2.5153e-01, -1.0607e-01,  7.7273e-01, -3.0879e-01,  6.6077e-01,
                        7.2442e-01,  4.7315e-01, -7.2429e-02, -2.4301e-01,  3.4782e-02,
     

In [208]:
data = {
    "model_state":model.state_dict(),
    "input_size":input_size,
    "hidden_size":hidden_size,
    "num_classes":num_classes,
    "all_words":all_words,
    "tags":tags
}

In [209]:
torch.save(data, "data.pth")
print(f'training complete file saved to {"data.pth"}')

training complete file saved to data.pth


In [210]:
# with torch.inference_mode():
#     for (words, labels) in train_loader:
#         words = words.to(device)
#         lables = labels.to(device)
        
#         out = model(words)
#         loss = loss_fn(out, labels)
#         pred_probs = torch.softmax(out, dim=1)
#         preds = torch.argmax(pred_probs, dim=1)
        
#         print(tags[preds])

In [211]:
## try this
# from torch.utils.data import TensorDataset

# dataset = TensorDataset(x_train, y_train)
# dataset

In [212]:
# from collections import Counter
# count = Counter(all_words)
# count

In [213]:
# %pprint

In [214]:
# all_words