In [1]:
import nltk
import numpy as np
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from nltk.stem.porter import PorterStemmer


stemmer  = PorterStemmer()

In [2]:
def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    return stemmer.stem(word.lower())

def bag_of_words(token_sentence, all_words):
    token_sentence = [stem(w) for w in token_sentence]
    bag = np.zeros(len(all_words), dtype=np.float32)
    
    for index, w in enumerate(all_words):
        if w in token_sentence:
            bag[index] = 1.0
            
    return bag
            
# sentence = ["hello", "how", "are", "you"]
# words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]

# bag = bag_of_words(sentence, words)
# bag

In [3]:
sent = ["organise", "organising", "organised"]
# tokenize(sent)
stemmed = [stem(w) for w in sent]
stemmed


['organis', 'organis', 'organis']

In [4]:
with open('intents.json', 'r') as file:
    intents = json.load(file)
intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi',
    'Hey',
    'How are you',
    'Is anyone there?',
    'Hello',
    'Good day'],
   'responses': ['Hey :-)',
    'Hello, thanks for visiting',
    'Hi there, what can I do for you?',
    'Hi there, how can I help?']},
  {'tag': 'goodbye',
   'patterns': ['Bye', 'See you later', 'Goodbye'],
   'responses': ['See you later, thanks for visiting',
    'Have a nice day',
    'Bye! Come back again soon.']},
  {'tag': 'thanks',
   'patterns': ['Thanks', 'Thank you', "That's helpful", "Thank's a lot!"],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure']},
  {'tag': 'items',
   'patterns': ['What can you do?', 'What is your job?'],
   'responses': ['You can try saying Open Google',
    'You can try saying Open browser',
    'You can try saying Play music',
    'You can try saying Open code editor',
    'You can try saying wikipedia something',
    'You can try saying Open youtube',
    'You can try saying generate password',


In [5]:
all_words = []
tags = []
xy = []


for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))
    

In [6]:
ignore = ['?', '.', ',', '!']

all_words = [stem(w) for w in all_words if w not in ignore]

In [7]:
all_words = sorted(set(all_words))

In [8]:
len(all_words)

32

In [9]:
tags = sorted(set(tags))

In [10]:
X_train = []
Y_train = []

for (pattent_sentence, tag) in xy:
    bag = bag_of_words(pattent_sentence, all_words)
    X_train.append(bag)
    
    labels = tags.index(tag)
    
    Y_train.append(labels)
    
    
X_train = np.array(X_train)
Y_train = np.array(Y_train)


In [28]:
class Chat_Dataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = Y_train
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.n_samples

batch_size = 8

    
dataset = Chat_Dataset()
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0) 

In [33]:
class NuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        
        return out

In [34]:
hidden_size = 8
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = .001
num_epochs = 1000

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NuralNet(input_size, hidden_size, output_size).to(device)

In [35]:
creterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [36]:
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)
        
        outputs = model(words)
        loss = creterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch + 1)%100 == 0:
        print(f"epoch : {epoch + 1}/{num_epochs}, loss : {loss.item():.4f}")
        
print(f"final epoch : {epoch + 1}/{num_epochs}, final loss : {loss.item():.4f}")

RuntimeError: expected scalar type Long but found Int