In [9]:
import json
from nltk_utils import tokenize, stem, bag_of_words
import numpy as np
from model import NeuralNet
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


In [2]:
# import our chat-bot intents file
with open('intents.json','r') as f:
    intents = json.load(f)

In [3]:
print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day'], 'responses': ['Hello, thanks for visiting', 'Good to see you again', 'Hi there, how can I help?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye'], 'responses': ['See you later, thanks for visiting', 'Have a nice day', 'Bye! Come back again soon.']}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful"], 'responses': ['Happy to help!', 'Any time!', 'My pleasure']}, {'tag': 'hours', 'patterns': ['What hours are you open?', 'What are your hours?', 'When are you open?'], 'responses': ["We're open every day 9am-9pm", 'Our hours are 9am-9pm every day']}, {'tag': 'mopeds', 'patterns': ['Which mopeds do you have?', 'What kinds of mopeds are there?', 'What do you rent?'], 'responses': ['We rent Yamaha, Piaggio and Vespa mopeds', 'We have Piaggio, Vespa and Yamaha mopeds']}, {'tag': 'payments', 'patterns': ['Do you take credit card

In [4]:
all_words = []
tags = []
xy = []

for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))


In [5]:
ignore_words = ['?', '!', '.', ',']

all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))


Creating a Training Data

In [6]:
X_train = []
y_train = []

for (pattern_sentence, tag) in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    label = tags.index(tag)
    y_train.append(label)  # CrossEntropy loss

X_train = np.array(X_train)
y_train = np.array(y_train, dtype=np.int64)  # Ensure y_train contains only integers


In [7]:
input_size = len(X_train[0])
output_size = len(tags)
hidden_size = 8
learning_rate = 0.001
epochs = 1000

In [10]:
class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = torch.Tensor(X_train)  # Convert to PyTorch tensor
        self.y_data = torch.Tensor(y_train).long()  # Convert to PyTorch tensor and cast to long

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples


In [11]:
batch_size = 8

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [12]:
model= NeuralNet(input_size,hidden_size,output_size)

In [13]:
device = torch.device('cuda' if torch.cuda.is_available()else 'cpu')

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

In [15]:
for epoch in range(epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device, dtype=torch.long)  # Convert labels to torch.long

        # Forward pass
        outputs = model(words)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss = {loss.item():.4f}')

print(f"Final loss = {loss.item():.4f}")

Epoch 100/1000, Loss = 1.4850
Epoch 200/1000, Loss = 0.2080
Epoch 300/1000, Loss = 0.0752
Epoch 400/1000, Loss = 0.0225
Epoch 500/1000, Loss = 0.0059
Epoch 600/1000, Loss = 0.0048
Epoch 700/1000, Loss = 0.0026
Epoch 800/1000, Loss = 0.0023
Epoch 900/1000, Loss = 0.0016
Epoch 1000/1000, Loss = 0.0003
Final loss = 0.0003


In [16]:
data = {
    "model_state": model.state_dict(),
    "input_size" : input_size,
    "output_size":output_size,
    "hidden_size":hidden_size,
    "all_words":all_words,
    "tags":tags,
    
}

In [17]:
FILE = 'data.pth'
torch.save(data,FILE)

print("training complete. file saved to {FILE}")

training complete. file saved to {FILE}
