In [1]:
import numpy as np
import random
import json
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from helpers import bag_of_words, tokenize, stem
from model import NeuralNet
from dataset import ChatDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
all_words = []
tags = []
xy = []

with open('./datasets/intents.json', 'r') as f:
    intents = json.load(f)

In [3]:
with open('./datasets/products.json', encoding="utf-8") as p_file:
    products = json.loads(p_file.read())
    
product_names = [p['title'] for p in products]
product_query_texts = intents['intents'][20]['text']
product_query_texts_template_number = len(product_query_texts)
TEMPLATE = "<PRODUCT>"

In [4]:

for text in product_query_texts:
    if TEMPLATE not in text:
        continue
    for product in product_names:
        product_query_texts.append(text.replace(TEMPLATE,product))


intents['intents'][20]['text'] = product_query_texts[product_query_texts_template_number:]

In [5]:
intents['intents'][20]['text']

['How Much Is Fjallraven - Foldsack No. 1 Backpack, Fits 15 Laptops?',
 'How Much Is Mens Casual Premium Slim Fit T-Shirts ?',
 'How Much Is Mens Cotton Jacket?',
 'How Much Is Mens Casual Slim Fit?',
 "How Much Is John Hardy Women's Legends Naga Gold & Silver Dragon Station Chain Bracelet?",
 'How Much Is Solid Gold Petite Micropave ?',
 'How Much Is White Gold Plated Princess?',
 'How Much Is Pierced Owl Rose Gold Plated Stainless Steel Double?',
 'How Much Is WD 2TB Elements Portable External Hard Drive - USB 3.0 ?',
 'How Much Is SanDisk SSD PLUS 1TB Internal SSD - SATA III 6 Gb/s?',
 'How Much Is Silicon Power 256GB SSD 3D NAND A55 SLC Cache Performance Boost SATA III 2.5?',
 'How Much Is WD 4TB Gaming Drive Works with Playstation 4 Portable External Hard Drive?',
 'How Much Is Acer SB220Q bi 21.5 inches Full HD (1920 x 1080) IPS Ultra-Thin?',
 'How Much Is Samsung 49-Inch CHG90 144Hz Curved Gaming Monitor (LC49HG90DMNXZA) – Super Ultrawide Screen QLED ?',
 "How Much Is BIYLACLESE

In [6]:
for intent in intents['intents']:
    tag = intent['intent']
    tags.append(tag)
    for pattern in intent['text']:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))

In [7]:
# print(all_words, "\n\n")
# print(tags, "\n\n")
# print(xy, "\n\n")
# print([data for data in xy if data[1]=='Products'], "\n\n")

In [8]:
ignore_words = ['?', '.', '!']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))

In [9]:
print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(all_words), "unique stemmed words:", all_words)

271 patterns
21 tags: ['Clever', 'CourtesyGoodBye', 'CourtesyGreeting', 'CourtesyGreetingResponse', 'CurrentHumanQuery', 'GoodBye', 'Greeting', 'GreetingResponse', 'NameQuery', 'NotTalking2U', 'PodBayDoor', 'PodBayDoorResponse', 'ProductQuery', 'RealNameQuery', 'SelfAware', 'Shutup', 'Swearing', 'Thanks', 'TimeQuery', 'UnderstandQuery', 'WhoAmI']
230 unique stemmed words: ['0', '1', '1080', '144hz', '15', '1920', '1tb', '2', '21', '256gb', '2tb', '3', '3d', '4', '49', '4tb', '5', '6', 'a', 'a55', 'acer', 'adam', 'adio', 'am', 'and', 'anyon', 'are', 'ask', 'awar', 'backpack', 'bay', 'be', 'bella', 'bi', 'biker', 'biylaclesen', 'boat', 'boost', 'bracelet', 'by', 'bye', 'cach', 'call', 'camera', 'can', 'casual', 'chain', 'chg90', 'clever', 'climb', 'coat', 'commun', 'comprendo', 'consciou', 'cost', 'cotton', 'could', 'curv', 'danvouy', 'do', 'doe', 'door', 'doubl', 'dragon', 'drive', 'element', 'enough', 'extern', 'faux', 'fit', 'fjallraven', 'foldsack', 'for', 'friend', 'fuck', 'full', '

In [10]:
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    label = tags.index(tag)
    y_train.append(label)

In [11]:
X_train = np.array(X_train)
y_train = np.array(y_train)

In [12]:
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 10
output_size = len(tags)
print(input_size, output_size)

230 21


In [13]:
dataset = ChatDataset(X_train, y_train)
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [15]:
model = NeuralNet(input_size, hidden_size, output_size).to(device)

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.0022
Epoch [200/1000], Loss: 0.0000
Epoch [300/1000], Loss: 0.0000
Epoch [400/1000], Loss: 0.0000
Epoch [500/1000], Loss: 0.0000
Epoch [600/1000], Loss: 0.0000
Epoch [700/1000], Loss: 0.0000
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000


In [18]:
print(f'final loss: {loss.item():.4f}')

final loss: 0.0000


In [19]:
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "./models/intent.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')

training complete. file saved to ./models/intent.pth
