In [None]:
import numpy as np
import nltk
from nltk.stem.porter import PorterStemmer
import json
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import random
nltk.download('punkt')

[nltk_data] Downloading package punkt to C:\Users\E.
[nltk_data]     Serban\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
def tokenize(sentece):
    return nltk.word_tokenize(sentece)

def stem(word):
    stemer = PorterStemmer()
    return stemer.stem(word.lower())

def bag_of_words(tokenized_sentace, all_words):
    bag = np.zeros(len(all_words), dtype=np.float32)
    tokenized_sentace = [stem(w) for w in tokenized_sentace]
    
    for idx, w in enumerate(all_words):
        if w in tokenized_sentace:
            bag[idx] = 1.0

    return bag

In [15]:
with open('intents.json', 'r') as f:
    model = json.load(f)

tags = []
xy = []
all_words = []
ignore_words = ['?', '!', '.', ',']

for intent in model['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        tokens = tokenize(pattern)
        all_words.extend(tokens)
        xy.append((tokens, tag))
 
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))

print(tags)
print(all_words)

X_train = []
y_train = []

for pattern, tag in xy:
    bag = bag_of_words(pattern, all_words)
    X_train.append(bag)

    label = tags.index(tag)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

['goodbye', 'greeting', 'habitat_crocodil', 'habitat_elefant', 'habitat_girafa', 'habitat_panda', 'habitat_tigru', 'hrana_crocodil', 'hrana_elefant', 'hrana_girafa', 'hrana_panda', 'hrana_tigru', 'periculos_crocodil', 'periculos_elefant', 'periculos_girafa', 'periculos_panda', 'periculos_tigru', 'viata_crocodil', 'viata_elefant', 'viata_girafa', 'viata_panda', 'viata_tigru']
['a', 'agresiv', 'agresiva', 'ani', 'ataca', 'auzim', 'buna', 'care', 'cat', 'cati', 'ce', 'ceau', 'consuma', 'crocodil', 'crocodilul', 'crocodilului', 'cu', 'curand', 'de', 'durata', 'e', 'elef', 'elefantul', 'elefantului', 'est', 'faci', 'fi', 'gasest', 'girafa', 'girafei', 'habitat', 'habitatul', 'hei', 'hrana', 'hranest', 'in', 'la', 'mananca', 'mediu', 'mult', 'ne', 'o', 'oameni', 'pa', 'panda', 'pe', 'periculo', 'periculoasa', 'poat', 'reveder', 'salut', 'se', 'tigru', 'tigrul', 'tigrului', 'traiest', 'un', 'und', 'ursului', 'viata', 'zona', 'zonel']


In [16]:
class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train.astype(np.int64)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index].squeeze()
    
    def __len__(self):
        return self.n_samples

In [17]:

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.layer_1 = nn.Linear(input_size, hidden_size)
        self.layer_2 = nn.Linear(hidden_size, hidden_size)
        self.layer_3 = nn.Linear(hidden_size, hidden_size)
        self.layer_4 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.layer_1(x)
        out = self.relu(out)
        out = self.layer_2(out)
        out = self.relu(out)
        out = self.layer_3(out)
        out = self.relu(out)
        out = self.layer_4(out)
        return out

In [18]:
dataset = ChatDataset()

batch_size = 8
hidden_size = 16
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = .001
num_epochs = 1000

train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device=device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for batch in train_loader:
        words, labels = batch
        words = words.to(device)
        labels = labels.to(device)

        # forward
        outputs = model(words)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'epoch {epoch+1}/{num_epochs}, loss={loss.item():.4f}')

print(f'final loss={loss.item():.4f}')

epoch 100/1000, loss=0.1093
epoch 200/1000, loss=0.0028
epoch 300/1000, loss=0.0013
epoch 400/1000, loss=0.0004
epoch 500/1000, loss=0.0003
epoch 600/1000, loss=0.0001
epoch 700/1000, loss=0.0001
epoch 800/1000, loss=0.0000
epoch 900/1000, loss=0.0000
epoch 1000/1000, loss=0.0000
final loss=0.0000


In [19]:
data = {
    'model_state' : model.state_dict(),
    'input_size' : input_size,
    'output_size' : output_size,
    'hidden_size' : hidden_size,
    'all_words' : all_words,
    'tags' : tags,
}

FILE = 'data.pth'
torch.save(data, FILE)

print(f'We have {len(tags)} topics !')
print(f'Training complete. File saved to {FILE}')

We have 22 topics !
Training complete. File saved to data.pth


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

with open('intents.json', 'r') as f:
    intents = json.load(f)
FILE = 'data.pth'
data = torch.load(FILE)

input_size = data['input_size']
hidden_size = data['hidden_size']
output_size = data['output_size']
all_words = data['all_words']
tags = data['tags']
model_state = data['model_state']

model = NeuralNet(input_size, hidden_size, output_size).to(device=device)
model.load_state_dict(model_state)
model.eval()

bot_name = 'Nicusor'
print("Let's chat ! type quit for exit")

while True:
    sentace = input('You: ')
    if sentace == 'quit':
        break

    sentace = tokenize(sentace)
    X = bag_of_words(sentace, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X)

    output = model(X)
    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent['tag']:
                respons = random.choice(intent['responses'])
                print(f"{bot_name}: {respons}")
    else:

        print('I do not understand...')

Let's chat ! type quit for exit
