In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import os
print(os.getcwd())

/content/drive/My Drive/Colab Notebooks


In [0]:
!pwd
path_to_mount = '/content/drive/My Drive/Colab Notebooks/'

os.chdir(path_to_mount)
!ls

/content/drive/My Drive/Colab Notebooks
 chatbot2.0.ipynb	   'Copy of chatbotPresents4.0.ipynb'
 chatbot4.0.ipynb	    Data
 chatbotPresents4.0.ipynb   decoder_serialized.pt
 chatbotSports3.0.ipynb     encoder_serialized.pt


In [0]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import re
import random
import glob
import json


# Use GPU if available
if (torch.cuda.is_available()):
    device = torch.device('cuda')
    print("Running on GPU")
else: 
    device = torch.device('cpu')
    print("Running on CPU")



Running on GPU


In [0]:
dialogues_regex_folder_path = "Data/data/dialogues/*.txt"

list_of_files = glob.glob(path_to_mount + dialogues_regex_folder_path)
print(list_of_files)
print(len(list_of_files))

['/content/drive/My Drive/Colab Notebooks/Data/data/dialogues/PRESENT_IDEAS.txt']
1


In [0]:
dicts= [] 


for filename in list_of_files:
  with open(filename) as f:
      for line in f: 
          dicts.append(json.loads(line)) 

In [0]:
new_dicts= [] 

for old_dict in dicts:
  foodict = {k: v for k, v in old_dict.items() if (k == 'turns')} 
  new_dicts.append(foodict)

print(len(new_dicts))


dicts= []
dicts= new_dicts

print(dicts[:2])

663
[{'turns': ['Hello how may I help you?', 'I need help buying a gift', 'What kind of gift would you like to buy?', "It's for a 2 year old baby", 'How do you know the 2 year old baby?', "It's a friend's kid.", 'A teddy bear is a great gift.', 'She already has a bunch.', 'Does she have any huge teddy bears?', 'No. That might work, thanks.', "You're welcome."]}, {'turns': ['Hello how may I help you?', 'I would like to get my spouse a gift.', 'is it for a special occasion?', 'Yes, it is his 53th birthday. I need help with ideas.', 'what is he interested in?', 'He likes motorcycles, the outdoors, and camping.', 'maybe some tents or any camping gear', 'Unfortunately, he has those items already.', 'how about motorcyle gear like a helmet that hooks up to your cellphone?', 'Do you have any other ideas?', 'a trip which is expensive to a luxurious camp site in the amazons', 'That seems like a good idea. Thank you.']}]


In [0]:
questions = []
answers = []


greeting = ["Hey", "Hi", "Hello", "How are you today?"]

bye = ["Ok", "Okie", "Bye"]

for dictionary in dicts:
  matrix_QA = dictionary['turns']
  
  questions.append(random.choice(greeting))
    
  bot_flag = True 
  for sentence in matrix_QA:

    if bot_flag == True:
      answers.append(sentence) 
      bot_flag = False 
      continue
    else:
      questions.append(sentence) 
      bot_flag = True 
      continue
  if bot_flag == True: 
    answers.append(random.choice(bye))

In [0]:
assert len(questions) == len(answers), "ERROR: The length of the questions and answer matrices are different."

print(len(questions))

4132


In [0]:
import csv

filepath_to_save = '/content/drive/My Drive/Colab Notebooks/Data/output.tsv' 
with open(filepath_to_save, 'wt') as out_file:
    
    tsv_writer = csv.writer(out_file, delimiter='\t')

    
    for i in range(len(questions)):
        tsv_writer.writerow([questions[i], answers[i]])

In [0]:
SOS = 0 
EOS = 1 

class QA_Lang:

    def __init__(self):
        self.word2index = {}
        self.index2word = {0: 'SOS', 1: 'EOS'} # Reserved for start and end token
        self.n_words = 2 # Initialize with start and end token

    def add_sentence(self, sentence):
        for word in sentence.split(' '): 
            if word not in self.word2index: 
                self.word2index[word] = self.n_words
                self.index2word[self.n_words] = word
                self.n_words += 1
      
    

In [0]:
def text_preprocess(sentence):
  sentence = sentence.lower().strip()
  normalized_sentence = [c for c in unicodedata.normalize('NFD', sentence) if
                        unicodedata.category(c) != 'Mn']

  sentence = ''
  sentence = ''.join(normalized_sentence)
  
  sentence = re.sub(r"([.!?])", r" \1", sentence)
  sentence = re.sub(r"[^a-zA-Z.!?]+", r" ", sentence)

  return sentence

In [0]:
def readQA():
 

    print('Reading lines from file...')

    data_path = os.getcwd() + "/Data/output.tsv" 
    lines = open(data_path, encoding='utf-8').read().strip().split('\n')

    TAB_CHARACTER = '\t'

    pairs = [[text_preprocess(sentence) \
              for sentence in line.split(TAB_CHARACTER)] \
              for line in lines]
    

    
    questions = QA_Lang()
    answers = QA_Lang()

    return questions, answers, pairs

In [0]:
MAX_LENGTH = 50 
def filter_data(pairs):

    new_pairs = []

    for pair in pairs:
        question_length = len(pair[0].split(' '))
        answer_length = len(pair[1].split(' '))

        if question_length < MAX_LENGTH and answer_length < MAX_LENGTH:
            new_pairs.append(pair)

    return new_pairs

In [0]:
def prepare_data():

    questions, answers, pairs = readQA()
    print("Read " + str(len(pairs)) + " sentence pairs")

    pairs = filter_data(pairs)
    print("filtered down to " + str(len(pairs)) + " sentence pairs")

    for pair in pairs:
        questions.add_sentence(pair[0])
        answers.add_sentence(pair[1])

    print("The questions object is defined by " +
                        str(questions.n_words) + " words")
    
    print("The answers object is defined by " +
                        str(answers.n_words) + " words")

    return questions, answers, pairs

In [0]:
questions, answers, pairs = prepare_data()

Reading lines from file...
Read 4132 sentence pairs
filtered down to 4130 sentence pairs
The questions object is defined by 1829 words
The answers object is defined by 2344 words


In [0]:
class Encoder(nn.Module):

    def __init__(self, input_size, hidden_size): 
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size

 
        self.embedding = nn.Embedding(input_size, hidden_size)

        self.gru = nn.GRU(hidden_size, hidden_size)

    
    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded

        # Pass the hidden state and the encoder output to the next word input
        output, hidden = self.gru(output, hidden) 

        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)


class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        
        self.attention = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attention_combine = nn.Linear(self.hidden_size * 2,
                                           self.hidden_size)
        
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        # Forward passes as from the repo
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attention_weights = F.softmax(self.attention(torch.cat((embedded[0],
                                                                hidden[0]), 1)),
                                                                 dim=1)
        
        attention_applied = torch.bmm(attention_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attention_applied[0]), 1)
        output = self.attention_combine(output).unsqueeze(0)

        
        output = F.relu(output)

        
        output, hidden = self.gru(output, hidden)

        
        output = F.log_softmax(self.out(output[0]), dim=1)

        return output, hidden, attention_weights

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [0]:
def tensor_from_sentence(lang, sentence):

    indices = [lang.word2index[word] for word in sentence.split(' ')]
    indices.append(EOS) 

    sentence_tensor = torch.tensor(indices, dtype=torch.long, device=device).view(-1, 1)

    return sentence_tensor

def tensors_from_pair(pair):    
    input_tensor = tensor_from_sentence(questions, pair[0])
    target_tensor = tensor_from_sentence(answers, pair[1])

    return (input_tensor, target_tensor)

In [0]:
import time
import math

def as_minutes(s):
    m = math.floor(s / 60)
    s -= m * 60

    return '%dm %ds' % (m, s)

def time_since(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s

    return '%s (- %s)' % (as_minutes(s), as_minutes(rs))

In [0]:
def BLEU(encoder, attention_decoder, n_examples):
    total_score = 0
    evaluate_pairs = [random.choice(pairs) for i in range(n_examples)]
    for pair in evaluate_pairs:
        input_sentence = pair[0]
        target_words = [pair[1]]
        output_words, _ = evaluate(encoder, attention_decoder, input_sentence)
        output_words = output_words
        score = sentence_bleu(target_words, output_words)
        total_score += score
    average_BLEU = total_score/len(pairs)
    return average_BLEU

In [0]:
def train(encoder, decoder, iterations, print_at_every=1000, learning_rate=0.01):
    

    start = time.time() 
    print_total_loss = 0 
    
    
    #encoder_optimizer = optim.Adam(encoder.parameters(), amsgrad = True, lr=learning_rate)
    #decoder_optimizer = optim.Adam(encoder.parameters(), amsgrad = True, lr=learning_rate)
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    
    training_pairs = [tensors_from_pair(random.choice(pairs)) for i in range(iterations)]

    
    criterion = nn.NLLLoss() 
    
    
    for i in range(1, iterations + 1):
        training_pair = training_pairs[i - 1]

        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder, decoder,
                encoder_optimizer, decoder_optimizer, criterion)

        print_total_loss += loss

        
        if i % print_at_every == 0:
            print_avg_loss = print_total_loss / print_at_every
            print_total_loss = 0 
            print('%s (%d %d%%) %.4f' % (time_since(start, i / iterations),
                             i, i / iterations * 100, print_avg_loss))
        

In [0]:
hidden_size = 512 

encoder = Encoder(questions.n_words, hidden_size).to(device)
attention_decoder = Decoder(hidden_size, answers.n_words, dropout_p=0.2).to(device)

iterations = 150000 

In [0]:
train(encoder, attention_decoder, iterations, print_at_every=(iterations//15))

4m 52s (- 68m 14s) (10000 6%) 2.6334
9m 27s (- 61m 29s) (20000 13%) 1.6638
14m 2s (- 56m 11s) (30000 20%) 1.0493
18m 45s (- 51m 35s) (40000 26%) 0.6735
23m 26s (- 46m 53s) (50000 33%) 0.4542
28m 5s (- 42m 8s) (60000 40%) 0.3106
32m 46s (- 37m 27s) (70000 46%) 0.2306
37m 27s (- 32m 46s) (80000 53%) 0.1843
42m 9s (- 28m 6s) (90000 60%) 0.1522
46m 50s (- 23m 25s) (100000 66%) 0.1327
51m 26s (- 18m 42s) (110000 73%) 0.1210
56m 7s (- 14m 1s) (120000 80%) 0.1170
60m 48s (- 9m 21s) (130000 86%) 0.1182
65m 29s (- 4m 40s) (140000 93%) 0.1067
70m 9s (- 0m 0s) (150000 100%) 0.1052


In [0]:
def inference(encoder, decoder, sentence, max_length=MAX_LENGTH):
    
  with torch.no_grad(): 

      sentence = text_preprocess(sentence) 

      input_tensor = tensor_from_sentence(questions, sentence) 
      input_length = input_tensor.size()[0]

      
      encoder_hidden = encoder.init_hidden()

      
      encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

      
      for encoder_input in range(input_length):
          encoder_output, encoder_hidden = encoder(input_tensor[encoder_input],
                                                    encoder_hidden)
          encoder_outputs[encoder_input] += encoder_output[0, 0]

      
      decoder_input = torch.tensor([[SOS]], device=device)

      
      decoder_hidden = encoder_hidden

      
      decoded_words = []

      
      for d_i in range(max_length):
          decoder_output, decoder_hidden, decoder_attention = decoder(
                  decoder_input, decoder_hidden, encoder_outputs)
          
          _, top_i = decoder_output.data.topk(1) 

          if top_i.item() == EOS: 
              break 
          else:
              decoded_words.append(answers.index2word[top_i.item()])

          decoder_input = top_i.squeeze().detach()

      return ' '.join(decoded_words)

In [0]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [0]:
def showAttention(input_sentence, output_words, attentions):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.numpy(), cmap='bone')
    fig.colorbar(cax)

    ax.set_xticklabels([''] + input_sentence.split(' ') +
                       ['<EOS>'], rotation=90)
    ax.set_yticklabels([''] + output_words)

    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()

In [0]:
from nltk.translate.bleu_score import sentence_bleu
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensor_from_sentence(questions, sentence)
        if input_tensor.size()[0] > 50:
            input_tensor = input_tensor[:50]
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.init_hidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS]], device=device)  

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(answers.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words[:-1], decoder_attentions[:di + 1]

In [0]:
# Run this code block to test chatbot
print("Enter bye to quit")
while (1):
  
  user_input = input("User: ")

  user_input = str(user_input)

  if user_input == 'bye':
    print("Bot: Okay bye..")
    break;
  else:
    print("Bot: " + str(inference(encoder, attention_decoder, user_input)))

Enter bye to quit
User: Hello
Bot: hello how may i help you ?
User: I need help with a gift
Bot: i have some ideas .
User: lets hear them
Bot: what about a facial wax ?
User: yeah that's a great idea
Bot: would you like to buy .
User: yes thank you
Bot: you re welcome !
User: bye
Okay bye..


In [0]:
encoder_name = 'encoder_serialized.pt'
decoder_name = 'decoder_serialized.pt'
print('Saving model...')
torch.save(encoder, encoder_name)
torch.save(attention_decoder, decoder_name)

Saving model...


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [0]:
%%capture
BLEUScore = BLEU(encoder, attention_decoder, 5000)

In [0]:
print('Raw BLEU score of the whole model: {:.4%}'.format(BLEUScore))
print('BLEU score of the whole mdoel: {:.4%}'.format(BLEUScore * 10))

Raw BLEU score of the whole model: 8.3263%
BLEU score of the whole mdoel: 83.2634%
