In [5]:
!pip install -U torch==1.11.0 torchdata==0.3.0 torchtext==0.12.0
import torchtext
print(torchtext.__version__)

Defaulting to user installation because normal site-packages is not writeable
Requirement already up-to-date: torch==1.11.0 in /opt/conda/lib/python3.7/site-packages (1.11.0)
Requirement already up-to-date: torchdata==0.3.0 in /root/.local/lib/python3.7/site-packages (0.3.0)
Collecting torchtext==0.12.0
  Downloading torchtext-0.12.0-cp37-cp37m-manylinux1_x86_64.whl (10.4 MB)
[K     |████████████████████████████████| 10.4 MB 5.0 MB/s eta 0:00:01
Installing collected packages: torchtext
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.14.1
    Uninstalling torchtext-0.14.1:
      Successfully uninstalled torchtext-0.14.1
Successfully installed torchtext-0.12.0
0.12.0


# LSTM Bot

## Project Overview

In this project, you will build a chatbot that can converse with you at the command line. The chatbot will use a Sequence to Sequence text generation architecture with an LSTM as it's memory unit. You will also learn to use pretrained word embeddings to improve the performance of the model. At the conclusion of the project, you will be able to show your chatbot to potential employers.

Additionally, you have the option to use pretrained word embeddings in your model. We have loaded Brown Embeddings from Gensim in the starter code below. You can compare the performance of your model with pre-trained embeddings against a model without the embeddings.



---



A sequence to sequence model (Seq2Seq) has two components:
- An Encoder consisting of an embedding layer and LSTM unit.
- A Decoder consisting of an embedding layer, LSTM unit, and linear output unit.

The Seq2Seq model works by accepting an input into the Encoder, passing the hidden state from the Encoder to the Decoder, which the Decoder uses to output a series of token predictions.

## Dependencies

- Pytorch
- Numpy
- Pandas
- NLTK
- Gzip
- Gensim


Please choose a dataset from the Torchtext website. We recommend looking at the Squad dataset first. Here is a link to the website where you can view your options:

- https://pytorch.org/text/stable/datasets.html





In [9]:
import gensim
import nltk
import numpy as np
import pandas as pd
import gzip
import torch
print(torch.__version__)
from nltk.corpus import brown

# below added by me
from torchtext.datasets import SQuAD2
from torchtext.vocab import build_vocab_from_iterator
from torchtext.data.utils import get_tokenizer
import json
import random
from collections import Counter
from torch.utils.data import Dataset, DataLoader, random_split

# below added by me
from nltk.tokenize import RegexpTokenizer
from torch import nn

nltk.download('brown')
nltk.download('punkt')

# Output, save, and load brown embeddings

model = gensim.models.Word2Vec(brown.sents())
model.save('brown.embedding')

w2v = gensim.models.Word2Vec.load('brown.embedding')
print("Before adding tokens: ", len(w2v.wv))
# print(w2v.wv)
# print("vector len of a: ", len(w2v.wv.get_vector('an')))
w2v.wv.add_vectors(['SOS', 'EOS'], [np.zeros(100), np.zeros(100)])
# print("After token added: ", len(w2v.wv))
# print(w2v.wv)
weights = torch.FloatTensor(w2v.wv.vectors)

def preprocess(text):

    # Replace punctuation with tokens so we can use them in our model
    text = text.lower()
    text = text.replace('.', ' <PERIOD> ')
    text = text.replace(',', ' <COMMA> ')
    text = text.replace('"', ' <QUOTATION_MARK> ')
    text = text.replace(';', ' <SEMICOLON> ')
    text = text.replace('!', ' <EXCLAMATION_MARK> ')
    text = text.replace('?', ' <QUESTION_MARK> ')
    text = text.replace('(', ' <LEFT_PAREN> ')
    text = text.replace(')', ' <RIGHT_PAREN> ')
    text = text.replace('--', ' <HYPHENS> ')
    text = text.replace('?', ' <QUESTION_MARK> ')
    # text = text.replace('\n', ' <NEW_LINE> ')
    text = text.replace(':', ' <COLON> ')
    words = text.split()
    
    # Remove all words with  5 or fewer occurences
    word_counts = Counter(words)
    trimmed_words = [word for word in words if word_counts[word] > 5]

    return trimmed_words

def parse(path):
    g = gzip.open(path, 'rb')
    for l in g:
        data = json.loads(l)    
        for datum in data['data']:
            yield datum

def loadDF(path):
    i = 0
    df = {}
    for d in parse(path):
        df[i] = d
        i += 1
    return pd.DataFrame.from_dict(df, orient='index')


def prepare_text(sentence):
    nltkTokenizer = RegexpTokenizer(r'\w+')
    tokens = nltkTokenizer.tokenize(sentence)

    tokens.append('EOS')
    tokens.insert(0, 'SOS')
    return tokens



def train_test_split(dataset):
    
    '''
    Input: SRC, our list of questions from the dataset 
            TRG, our list of responses from the dataset

    Output: Training and test datasets for SRC & TRG

    '''
    train_size = int(len(dataset)* 0.8)
    validation_size = int(len(dataset)* 0.1)
    test_size = len(dataset) - train_size - validation_size

    train_dataset, validation_dataset, test_dataset = random_split(dataset, [train_size, validation_size, test_size])
    return train_dataset, validation_dataset, test_dataset



tokenizer = get_tokenizer('basic_english')
SRC_train_iter = SQuAD2(root = './', split='train')
df = loadDF('./SQuAD2/train-v2.0.json.gz')
print(df)


1.11.0+cu102


[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Before adding tokens:  15173
                                              title  \
0                                           Beyoncé   
1                                   Frédéric_Chopin   
2    Sino-Tibetan_relations_during_the_Ming_dynasty   
3                                              IPod   
4            The_Legend_of_Zelda:_Twilight_Princess   
..                                              ...   
437                                       Infection   
438                                         Hunting   
439                                       Kathmandu   
440                           Myocardial_infarction   
441                                          Matter   

                                            paragraphs  
0    [{'qas': [{'question': 'When did Beyonce start...  
1    [{'qas': [{'question': "What was Frédéric's na...  
2    [{'qas': [{'question': 'Who were Wang Jiawei a...  
3    [{'qas': [{'question': 'Which company produces...  
4    [{'qas': [{'question

In [12]:
class Vocab:
    def __init__(self, name):
        self.name = name
        self.index = {}
        self.count = 0
        self.words = {}
        self.words_freq = {}

    def indexWord(self, word):
        if word not in self.words:
            self.words[word] = self.count
            self.words_freq[word] = 1
            self.index[str(self.count)] = word
            self.count += 1
            return True
        else:
            self.words_freq[word] += 1
            return False

In [13]:
myVocab = Vocab(name = "myVocab")
count = 0
for titleIdx, paragraphs in df.iterrows():
    for para in paragraphs['paragraphs']:
        for qa in para['qas']:
            text = prepare_text(qa['question'])
            for t in text:
                if (myVocab.indexWord(t.lower())):
                    if (count % 1000) == 0:
                        print("Adding word {} to our vocabulary.".format(count))
                        print(text)
                    count += 1
            
            # print(qa['answers'][0])
            # ansList = []
            for answer in qa['answers']:
                text = prepare_text(answer['text'])
                # ansList.append(text)
                for t in text:
                    if (myVocab.indexWord(t.lower())):
                        if (count % 1000) == 0:
                            print("Adding word {} to our vocabulary.".format(count))
                            print(text)
                        count += 1

Adding word 0 to our vocabulary.
['SOS', 'When', 'did', 'Beyonce', 'start', 'becoming', 'popular', 'EOS']
Adding word 1000 to our vocabulary.
['SOS', 'Besides', 'R', 'B', 'which', 'genres', 'does', 'Beyonce', 'dabble', 'in', 'EOS']
Adding word 2000 to our vocabulary.
['SOS', 'Which', 'friend', 'took', 'on', 'the', 'role', 'of', 'several', 'jobs', 'to', 'help', 'Chopin', 'including', 'copyist', 'EOS']
Adding word 3000 to our vocabulary.
['SOS', 'What', 'was', 'the', 'name', 'of', 'the', 'Tibetologist', 'EOS']
Adding word 4000 to our vocabulary.
['SOS', 'Many', 'different', 'types', 'of', 'interaction', 'can', 'be', 'controlled', 'by', 'how', 'many', 'buttons', 'EOS']
Adding word 5000 to our vocabulary.
['SOS', 'delayed', 'arrival', 'EOS']
Adding word 6000 to our vocabulary.
['SOS', 'About', 'how', 'many', 'million', 'square', 'feet', 'of', 'office', 'space', 'is', 'present', 'in', 'Midtown', 'Manhattan', 'EOS']
Adding word 7000 to our vocabulary.
['SOS', 'hydrogen', 'production', 'from'

In [14]:
myVocab.words_freq

{'sos': 217148,
 'when': 9991,
 'did': 23178,
 'beyonce': 307,
 'start': 692,
 'becoming': 115,
 'popular': 500,
 'eos': 217140,
 'in': 42036,
 'the': 107784,
 'late': 452,
 '1990s': 128,
 'what': 78062,
 'areas': 548,
 'compete': 72,
 'she': 227,
 'was': 25286,
 'growing': 107,
 'up': 1225,
 'singing': 41,
 'and': 18513,
 'dancing': 13,
 'leave': 195,
 'destiny': 50,
 's': 13589,
 'child': 306,
 'become': 1143,
 'a': 20229,
 'solo': 37,
 'singer': 72,
 '2003': 239,
 'city': 2640,
 'state': 1826,
 'grow': 116,
 'houston': 327,
 'texas': 211,
 'which': 8974,
 'decade': 261,
 'famous': 360,
 'r': 157,
 'b': 214,
 'group': 1615,
 'lead': 331,
 'album': 303,
 'made': 1400,
 'her': 578,
 'worldwide': 146,
 'known': 1387,
 'artist': 163,
 'dangerously': 13,
 'love': 114,
 'who': 14229,
 'managed': 45,
 'mathew': 2,
 'knowles': 1,
 'beyoncé': 232,
 'rise': 197,
 'to': 31128,
 'fame': 41,
 'role': 334,
 'have': 4880,
 'first': 4142,
 'released': 558,
 'as': 6384,
 'release': 331,
 'how': 13557

In [15]:
from collections import Counter
print(len(myVocab.words))

threshold = 1e-1 # test 10, 1, 1e-1, 1e-2
p_disc = {myVocab.index[str(idx)]: 1 - np.sqrt(threshold / myVocab.words_freq[myVocab.index[str(idx)]]) for idx in range(myVocab.count)}

trainVocab = Vocab(name = "trainVocab")
for i in range(myVocab.count):
    word = myVocab.index[str(i)]
    word_freq = myVocab.words_freq[word]
    idx = myVocab.words[word]
    if (random.random() < (1-p_disc[word]) and word_freq > 2):
        trainVocab.indexWord(word)


trainVocab.indexWord('sos')
trainVocab.indexWord('eos')
print(len(trainVocab.words))

56523
2863


In [16]:
class QDataset(Dataset):
    """ Q dataset """
    def __init__(self, dataFrame):
        self.questions = []
        for titleIdx, paragraphs in dataFrame.iterrows():
            for para in paragraphs['paragraphs']:
                for qa in para['qas']:
                    text = prepare_text(qa['question'])
                    question = []
                    if len(qa['answers']) > 0:
                        for t in text:
                            try:
                                question.append(trainVocab.words[t.lower()])
                            except:
                                question.append(len(trainVocab.words))
                        self.questions.append(question)


        # self.questions = torch.Tensor(self.questions)

                    
    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        return self.questions[idx]

class ADataset(Dataset):
    """ A dataset """
    def __init__(self, dataFrame):
        self.answers = []
        for titleIdx, paragraphs in dataFrame.iterrows():
            for para in paragraphs['paragraphs']:
                for qa in para['qas']:
                    ansList = []
                    for answer in qa['answers']:
                        answerVocab = []
                        text = prepare_text(answer['text'])
                        for t in text:
                            try:
                                answerVocab.append(trainVocab.words[t.lower()])
                            except:
                                answerVocab.append(len(trainVocab.words))
                        ansList.append(answerVocab)

                    if len(ansList) > 0:
                        self.answers.append(ansList[0])

        # self.answers = torch.Tensor(self.answers)

    def __len__(self):
        return len(self.answers)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        return self.answers[idx]


class QADataset(Dataset):
    def __init__(self, qDataset, aDataset):
        self.src = qDataset
        self.trg = aDataset
        assert(len(self.src) == len(self.trg))
    
    def __len__(self):
        return len(self.src)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        return {"question": torch.tensor(self.src[idx]), 
                "answer": torch.tensor(self.trg[idx])}
        


SRC_dataset = QDataset(df)
TRG_dataset = ADataset(df)

dataset = QADataset(SRC_dataset, TRG_dataset)
len(dataset)



86821

In [17]:
max_length = 100
embeddings_index = {}



embedding_matrix = np.zeros((trainVocab.count + 2, max_length))
for i in range(len(trainVocab.words)):
    word = trainVocab.index[str(i)]
    try:
        embedding_vector = w2v.wv[word]
        embedding_matrix[i] = embedding_vector
        print("{} ".format(word))
    except:
        continue

venture 
marry 
is 
recurring 
also 
get 
my 
couple 
blamed 
gross 
surface 
bush 
listed 
concerts 
appearance 
gala 
husband 
baby 
dedicated 
cries 
raise 
ballot 
asked 
encourage 
addressed 
recipients 
spend 
recordings 
proves 
aggressive 
museums 
latest 
rush 
pulse 
services 
shoe 
disaster 
initially 
miss 
guest 
related 
surviving 
painted 
portraits 
intimate 
report 
delicate 
commented 
moment 
crushed 
officially 
identified 
pupil 
attendance 
road 
displaying 
think 
affection 
recover 
calm 
soprano 
invitation 
inability 
showed 
dozen 
essay 
diagnosis 
certificate 
descriptive 
scholarly 
flexible 
straightforward 
formidable 
increases 
modes 
disregard 
lucky 
comfortable 
peers 
publisher 
seventeen 
barely 
themselves 
mentioned 
care 
strongly 
dominate 
replace 
workshops 
recovery 
raids 
organic 
priest 
trips 
embassy 
nominal 
belonged 
amplifier 
pulled 
stereo 
cars 
computer 
lapse 
infringement 
behalf 
72 
relating 
somewhat 
wind 
analogous 
inte

In [18]:

class Encoder(nn.Module):
    
    def __init__(self, input_size, hidden_size, n_layers, dropout = 0.1):
        
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.n_layers = n_layers
                
        # self.hidden = torch.zeros(n_layers, 1, hidden_size)
        # self.cell = torch.zeros(n_layers, 1, hidden_size)
        # self.embedding provides a vector representation of the inputs to our model
        # EXTRA CREDIT: Load your pretrained embeddings into the LSTM unit -- how?
        self.embedding = nn.Embedding.from_pretrained(torch.Tensor(embedding_matrix))
        self.embedding_dim = self.embedding.weight.shape[1]
        # self.lstm, accepts the vectorized input and passes a hidden state
        self.lstm = nn.LSTM(self.embedding_dim, self.hidden_size, n_layers, dropout = dropout)
    
    def forward(self, i, hidden, cell):
        
        '''
        Inputs: i, the src vector
        Outputs: o, the encoder outputs
                h, the hidden state
                c, the cell state
        '''
        i = i.to(torch.int)
        lstm_in = self.embedding(i)
        # print(i)
        o, (h, c) = self.lstm(lstm_in, (hidden, cell))
        return o, h, c
    

class Decoder(nn.Module):
      
    def __init__(self, hidden_size, output_size, n_layers, dropout_p):
        
        super(Decoder, self).__init__()
        
        self.hidden_size = hidden_size
        self.output_size = output_size
        # self.embedding provides a vector representation of the target to our model
        self.embedding = nn.Embedding.from_pretrained(torch.Tensor(embedding_matrix))
        self.embedding_size = self.embedding.weight.shape[1]
        # self.lstm, accepts the embeddings and outputs a hidden state
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, n_layers, dropout = dropout_p)
        # self.ouput, predicts on the hidden state via a linear output layer     
        self.output = nn.Linear(self.hidden_size, self.output_size)
        
        # self.hidden = torch.zeros(n_layers, 1, hidden_size)
        # self.cell = torch.zeros(n_layers, 1, hidden_size)


    def forward(self, i, h, c):
        
        '''
        Inputs: i, the target vector
        Outputs: o, the prediction
                h, the hidden state
        '''
        # i dim: [batch size]
        # h dim: [n_layers, batch_size, hidden_size]
        # print("i shape: ", i.shape)
        i = i.to(torch.int)

        input = i.unsqueeze(0)
        embedded = self.embedding(input)
        lstm_out, (hidden, cell) = self.lstm(embedded, (h, c))
        o = self.output(lstm_out)
        return o, hidden, cell
        
        

class Seq2Seq(nn.Module):
    
    def __init__(self, encoder_input_size, encoder_hidden_size, decoder_hidden_size, decoder_output_size):
        
        super(Seq2Seq, self).__init__()
        self.n_layers = 2
        self.encoder = Encoder(encoder_input_size, encoder_hidden_size, self.n_layers, dropout = 0.1)
        self.decoder = Decoder(decoder_hidden_size, decoder_output_size, self.n_layers, dropout_p = 0.1)
        if (torch.cuda.is_available()):
            print("cuda available")
            self.device = torch.device('cuda')
        else:
            print("cuda unavailable")
            self.device = torch.device('cpu')
    
    
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):      
        batch_size = trg.shape[1]
        # print("trg shape: ", trg.shape)
        # print("batch size: ", batch_size)
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_size

        # tensor to store decoder outputs
        o = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)

        # last hidden state of the encoder serves as the initial hidden state of the decoder
        encoder_hidden = torch.zeros(self.n_layers, batch_size, self.encoder.hidden_size).to(self.device)
        encoder_cell = torch.zeros(self.n_layers, batch_size, self.encoder.hidden_size).to(self.device)

        out, hidden, cell = self.encoder(src, encoder_hidden, encoder_cell)
        # first input to the decoder is <sos> tokens
        # print("encoder passed")
        input = trg[0,:]

        for t in range(1, trg_len):
            output, hidden, cell = self.decoder(input, hidden, cell)
            # print("decoder passed {} times among {}".format(t, trg_len))
            o[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.argmax(2)
            # print("output shape: ", output.shape)
            # print("top1: ", top1)
            
            if (teacher_force):
                input = trg[t]
            else:
                input = top1
                input = input.squeeze(0)

        return o

    



In [19]:
input_size = len(trainVocab.words) + 1
output_size = len(trainVocab.words) + 1
hidden_dim = 512
n_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5

seq = Seq2Seq(input_size, hidden_dim, hidden_dim, output_size)
emb_dim = seq.encoder.embedding_dim
seq = seq.to(seq.device)
print("model moved to {}".format(seq.device))

def init_weights(m):
    for name, param in m.named_parameters():
        if (name == "embedding.weight"):
            continue
        
        nn.init.uniform_(param.data, -0.08, 0.08)

seq.apply(init_weights)

cuda available
model moved to cuda


Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(2865, 100)
    (lstm): LSTM(100, 512, num_layers=2, dropout=0.1)
  )
  (decoder): Decoder(
    (embedding): Embedding(2865, 100)
    (lstm): LSTM(100, 512, num_layers=2, dropout=0.1)
    (output): Linear(in_features=512, out_features=2864, bias=True)
  )
)

In [20]:
import torch.optim as optim
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
optimizer = optim.Adam(seq.parameters())
# trg_pad_idx = [trainVocab.words['sos'], trainVocab.words['eos']]
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter(log_dir='./tensorboard/')

def train(model, iterator, optimizer, critierion, clip):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(tqdm(iterator)):
        src = batch['question']
        trg = batch['answer']

        # print("src shape: ", src.shape)

        optimizer.zero_grad()

        output = model(src, trg)

        output_dim = output.shape[-1]

        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)

        loss = criterion(output, trg)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()
        epoch_loss += loss.item()

    return epoch_loss / len(iterator)

In [21]:
def evaluate(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(iterator):
        
            src = batch['question']
            trg = batch['answer']
            # print(src.shape)
            src = src.to(model.device)
            trg = src.to(model.device)

            output = model(src, trg, 0) # turn off teacher forcing

            output_dim = output.shape[-1]

            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)

            loss = criterion(output, trg)

            epoch_loss += loss.item()

        

    return epoch_loss / len(iterator)

In [22]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [23]:
import time
import math

N_EPOCHS = 100
CLIP = 1

best_valid_loss = float('inf')
train_dataset, validation_dataset, test_dataset = train_test_split(dataset)
print(f"Training Data Size : {len(train_dataset)}")
print(f"Validation Data Size : {len(validation_dataset)}")
print(f"Testing Data Size : {len(test_dataset)}")

def make_batch(samples):
    max_batch_data_size = 30
    questions = [sample['question'] for sample in samples]
    # questions.append(torch.Tensor(np.zeros(30)))
    answers = [sample['answer'] for sample in samples]
    # answers.append(torch.Tensor(np.zeros(30)))

    padded_questions = torch.nn.utils.rnn.pad_sequence(questions, batch_first=False)
    # padded_questions = padded_questions[:,0:16]
    padded_answers = torch.nn.utils.rnn.pad_sequence(answers, batch_first=False)
    # padded_answers = padded_questions[:,0:16]
    # print("padded questions shape: ", padded_questions.shape)
    return {'question': padded_questions.contiguous().to("cuda"),
            'answer': padded_answers.contiguous().to("cuda")}
 # temporary, hard coded to use cuda

train_dataloader = DataLoader(train_dataset, batch_size = 64, shuffle=True, drop_last=True, collate_fn=make_batch)
validation_dataloader = DataLoader(validation_dataset, batch_size = 64, shuffle=True, drop_last=True, collate_fn=make_batch)
test_dataloader = DataLoader(test_dataset, batch_size = 64, shuffle=True, drop_last=True, collate_fn=make_batch)



print(f"Training Data num Batches : {len(train_dataloader)}")
print(f"Validation Data num Batches : {len(validation_dataloader)}")
print(f"Testing Data num Batches : {len(test_dataloader)}")


for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(seq, train_dataloader, optimizer, criterion, CLIP)
    valid_loss = evaluate(seq, validation_dataloader, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(seq.state_dict(), 'model64_th1e-1_' + str(epoch) + '.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} ')
    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar("Loss/valid", valid_loss, epoch)

    print(f'\t Val. Loss: {valid_loss:.3f} ')

writer.close()

  0%|          | 0/1085 [00:00<?, ?it/s]

Training Data Size : 69456
Validation Data Size : 8682
Testing Data Size : 8683
Training Data num Batches : 1085
Validation Data num Batches : 135
Testing Data num Batches : 135


100%|██████████| 1085/1085 [02:04<00:00,  8.68it/s]
  0%|          | 1/1085 [00:00<02:02,  8.86it/s]

Epoch: 01 | Time: 2m 9s
	Train Loss: 0.345 
	 Val. Loss: 1.282 


100%|██████████| 1085/1085 [02:05<00:00,  8.66it/s]
  0%|          | 1/1085 [00:00<02:22,  7.60it/s]

Epoch: 02 | Time: 2m 9s
	Train Loss: 0.284 
	 Val. Loss: 1.366 


100%|██████████| 1085/1085 [02:04<00:00,  8.71it/s]
  0%|          | 1/1085 [00:00<01:50,  9.79it/s]

Epoch: 03 | Time: 2m 9s
	Train Loss: 0.286 
	 Val. Loss: 2.034 


100%|██████████| 1085/1085 [02:05<00:00,  8.67it/s]
  0%|          | 1/1085 [00:00<02:33,  7.08it/s]

Epoch: 04 | Time: 2m 9s
	Train Loss: 0.282 
	 Val. Loss: 1.256 


100%|██████████| 1085/1085 [02:04<00:00,  8.72it/s]
  0%|          | 1/1085 [00:00<02:18,  7.81it/s]

Epoch: 05 | Time: 2m 8s
	Train Loss: 0.282 
	 Val. Loss: 2.012 


100%|██████████| 1085/1085 [02:05<00:00,  8.68it/s]
  0%|          | 1/1085 [00:00<02:04,  8.71it/s]

Epoch: 06 | Time: 2m 9s
	Train Loss: 0.279 
	 Val. Loss: 1.880 


100%|██████████| 1085/1085 [02:05<00:00,  8.67it/s]
  0%|          | 0/1085 [00:00<?, ?it/s]

Epoch: 07 | Time: 2m 9s
	Train Loss: 0.277 
	 Val. Loss: 1.304 


100%|██████████| 1085/1085 [02:04<00:00,  8.71it/s]
  0%|          | 0/1085 [00:00<?, ?it/s]

Epoch: 08 | Time: 2m 9s
	Train Loss: 0.276 
	 Val. Loss: 1.874 


100%|██████████| 1085/1085 [02:04<00:00,  8.71it/s]
  0%|          | 0/1085 [00:00<?, ?it/s]

Epoch: 09 | Time: 2m 9s
	Train Loss: 0.279 
	 Val. Loss: 1.384 


100%|██████████| 1085/1085 [02:04<00:00,  8.69it/s]
  0%|          | 0/1085 [00:00<?, ?it/s]

Epoch: 10 | Time: 2m 9s
	Train Loss: 0.279 
	 Val. Loss: 1.436 


100%|██████████| 1085/1085 [02:04<00:00,  8.71it/s]
  0%|          | 1/1085 [00:00<02:08,  8.44it/s]

Epoch: 11 | Time: 2m 8s
	Train Loss: 0.276 
	 Val. Loss: 1.351 


100%|██████████| 1085/1085 [02:05<00:00,  8.66it/s]
  0%|          | 1/1085 [00:00<02:23,  7.56it/s]

Epoch: 12 | Time: 2m 9s
	Train Loss: 0.273 
	 Val. Loss: 1.401 


 52%|█████▏    | 563/1085 [01:04<01:03,  8.19it/s]

KeyboardInterrupt: 

In [24]:
seq.load_state_dict(torch.load('model64_th1e-1_3.pt'))

test_loss = evaluate(seq, test_dataloader, criterion)

print(f'| Test Loss: {test_loss:.3f}')

| Test Loss: 1.272


In [26]:
import sys

input = "what is your name"
input = prepare_text(input)
inputInt = []
inputInt.append(trainVocab.words['sos'])
for t in input:
    try:
        inputInt.append(trainVocab.words[t.lower()])
    except:
        inputInt.append(len(trainVocab.words))

inputInt.append(trainVocab.words['eos'])
npFakeInput = np.zeros((len(inputInt), 64))
max_answer_len = 30
fake_batch = torch.Tensor(npFakeInput).to(torch.int).to(seq.device)
npFakeAns = np.zeros((max_answer_len, 64))
fake_ans = torch.Tensor(npFakeAns).to(torch.int).to(seq.device)
# fake_ans = torch.cat((fake_batch, torch.Tensor( np.zeros((1, max_answer_len - len(inputInt))) ) ), dim=1)
answer = seq(fake_batch, fake_ans, 0)
output_dim = answer.shape[-1]
answer = answer[:,0,:].view(-1, output_dim)
answerIdx = answer.argmax(dim=1)
answerIdxList = answerIdx.tolist()
print(answerIdxList)
ansText = ""

for idx in answerIdxList:
    if idx >= trainVocab.count:
        ansText = ansText + " None"
        continue
    ansText = ansText + " " + trainVocab.index[str(idx)]
print("Chatbot: " + ansText)

[0, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863, 2863]
Chatbot:  grammy None None None None None None None None None None None None None None None None None None None None None None None None None None None None None
