In [95]:
# Mount Google Drive
from google.colab import drive # import drive from google colab

ROOT = "/content/drive"     # default location for the drive
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT)           # we mount the google drive at /content/drive

/content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [96]:
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
import json
import numpy as np

class Nltk_utils:
  
  def __init__(self,sentence):
    self.sentence = sentence

    #tokenizing
    self.tokenized_words = self.tokenize()

    #stemming
    self.stemmed_words = self.tokenized_words
    for i in range(len(self.stemmed_words)):
      self.stemmed_words[i] = nlp.stem(self.stemmed_words[i])

  def tokenize(self):
    return nltk.word_tokenize(self.sentence);

  def stem(self,word):
    pstem = PorterStemmer()
    return pstem.stem(word.lower())

  #for next tutorial
  def bag_of_words(self,tokenized_sentence,all_words):    #have an array of all_words
    tokenized_sentence = [self.stem(w) for w in (tokenized_sentence)]
    bag = np.zeros(len(all_words),dtype=np.float32)

    for idx,w in enumerate(all_words):
      if w in tokenized_sentence:
        bag[idx] = 1.0

    return bag

if __name__ == "__main__":
  nlp = Nltk_utils("Is anyone there?")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [97]:
path="drive/My Drive/Colab Notebooks/ML/MachineLearning/NLP/Chatbot1/intents.json"

with open(path,'r') as f:
  intents = json.load(f)
#print(intents)

tags=[]
all_words = []
xy = []
for intent in (intents['intents']):
  tag = intent['tag']
  tags.append(tag)
  
  for pattern in intent['patterns']:
    nlp = Nltk_utils(pattern)
    all_words.extend(nlp.tokenized_words)
    xy.append((nlp.tokenized_words,tag))

#stemming
nlp = Nltk_utils("hi there")
word_ignore = ['?','!',',','.']
all_words = [nlp.stem(w) for w in all_words if w not in word_ignore]

#sorting and unique
all_words = sorted(set(all_words))
tags = sorted(set(tags))


# CREATING TRAINING DATA
X_train = []
y_train = []
for (pattern_sentence,tag) in xy:
  bag = nlp.bag_of_words(pattern_sentence,all_words)
  X_train.append(bag)

  label = tags.index(tag)
  y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)


In [98]:
# Pytorch 
# Dataset Creating in PyTorch
import torch
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset

class ChatDataSet(Dataset):
  def __init__(self):
    self.n_samples = len(X_train)
    self.x_data = X_train
    self.y_data = y_train

  def __getitem__(self,index):
    return self.x_data[index], self.y_data[index]

  def __len__(self):
    return self.n_samples

# Model Creating in PyTorch

class NeuralNetFF(nn.Module):
  def __init__(self,input_size,hidden_size,num_classes):
    super(NeuralNetFF,self).__init__()

    self.l1 = nn.Linear(input_size,hidden_size)
    self.l2 = nn.Linear(hidden_size,hidden_size)
    self.l3 = nn.Linear(hidden_size,num_classes)
    self.relu = nn.ReLU()
  def forward(self,x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out)
    out = self.relu(out)
    out = self.l3(out)
    # no activation , no sofmax
    return out



In [99]:



#HyperParameters
batch_size = 8
hidden_size = 8
input_size = X_train.shape[1]
output_size = len(tags)
alpha = 0.001
n_epochs = 1000

dataset = ChatDataSet()
train_loader = DataLoader(dataset=dataset,batch_size=batch_size,num_workers=2)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNetFF(input_size,hidden_size,output_size).to(device)

#Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=alpha) 


for epochs in range(n_epochs):
  for(words,labels) in train_loader:
    words = words.to(device)
    labels = labels.to(device)

    # forward
    outputs = model(words)
    loss = criterion(outputs,labels)

    # backward and optimizer step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if(epochs+1)%100==0:
    print(f'epoch {epochs+1}/{n_epochs}, loss = {loss.item()}')

print(f'Final loss = {loss.item()}') 


epoch 100/1000, loss = 1.101446509361267
epoch 200/1000, loss = 0.07942241430282593
epoch 300/1000, loss = 0.01555937435477972
epoch 400/1000, loss = 0.005742577835917473
epoch 500/1000, loss = 0.00284907640889287
epoch 600/1000, loss = 0.0016450597904622555
epoch 700/1000, loss = 0.0010377565631642938
epoch 800/1000, loss = 0.0006943836342543364
epoch 900/1000, loss = 0.0004829753888770938
epoch 1000/1000, loss = 0.0003460029256530106
Final loss = 0.0003460029256530106


In [100]:
# Saving Model

data = {
  "model_state": model.state_dict() ,
  "input_size": input_size,
  "output_size": output_size,
  "hidden_size": hidden_size,
  "all_words": all_words,
  "tags": tags,
}

FILE="drive/My Drive/Colab Notebooks/ML/MachineLearning/NLP/Chatbot1/"+"data.pth"
torch.save(data,FILE)


