In [81]:
import bisect
import time
import math
import random
import os
from itertools import zip_longest

import numpy as np

import pickle as pkl
import gzip

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.nn.functional as F

from torch import optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from utils import asMinutes, timeSince, load_zipped_pickle, corpus_bleu, directories
from langUtils import loadLangPairs, langDataset, langCollateFn, initHybridEmbeddings, tensorToList

import matplotlib.pyplot as plt
plt.switch_backend('agg')
import seaborn as sns; sns.set()
sns.set_style("darkgrid")
sns.set_context("paper")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [59]:
class SortedList(list):
    def insort(self, x):
        bisect.insort(self, x)

In [60]:
data_dir, em_dir = directories()

SPECIAL_SYMBOLS_ID = PAD_ID, UNK_ID, SOS_ID, EOS_ID = 0, 1, 2, 3
NUM_SPECIAL = len(SPECIAL_SYMBOLS_ID)

vi, en = loadLangPairs("vi")

In [93]:
BATCH_SIZE = 64

train_dataset = langDataset([(vi.train_num[i], en.train_num[i]) for i in range(len(vi.train_num)) if (len(vi.train[i]) < vi.max_length) & (len(en.train[i]) < en.max_length)])
overfit_dataset = langDataset([(vi.train_num[i], en.train_num[i]) for i in range(2)])
train_loader = torch.utils.data.DataLoader(dataset=overfit_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=False)
dev_dataset = langDataset([(vi.dev_num[i], en.dev_num[i]) for i in range(len(vi.dev_num)) if (len(vi.dev[i]) < vi.max_length) & (len(en.dev[i]) < en.max_length)])
dev_loader = torch.utils.data.DataLoader(dataset=dev_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=True)

In [90]:
class EncoderRNN(nn.Module):
    def __init__(self, params, raw_emb, learn_ids):
        super(EncoderRNN, self).__init__()
        
        self.hidden_size = params['hidden_size']
        self.num_layers = params['num_layers']
        
        self.embedding = initHybridEmbeddings(raw_emb, learn_ids)
        self.gru = nn.GRU(self.embedding.embedding_dim, params['hidden_size'], self.num_layers, batch_first=True, bidirectional=True)
        
    def forward(self, inp, inp_lens, hidden=None):
        embedded = self.embedding(inp)
        packed = pack_padded_sequence(embedded, inp_lens)
        
        output, self.hidden = self.gru(packed, hidden)
        output, _ = pad_packed_sequence(output)
        output = output[:, :, :self.hidden_size] + output[:, : ,self.hidden_size:]
        return output, self.hidden
    
class DecoderRNN(nn.Module):
    def __init__(self, params, raw_emb, learn_ids):
        super(DecoderRNN, self).__init__()
        self.hidden_size = params['hidden_size']
        self.num_layers = params['num_layers']
        self.output_size = params['output_size']

        self.embedding = initHybridEmbeddings(raw_emb, learn_ids)
        self.gru = nn.GRU(self.embedding.embedding_dim, params['hidden_size'], self.num_layers, batch_first=True, bidirectional=True)
        self.out = nn.Linear(self.hidden_size, self.output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, inp, hidden, encoder_output=None):
        embedded = self.embedding(inp)
        output = F.relu(embedded)
        
        output, self.hidden = self.gru(output, hidden)
        orig = output
        output = output[:, :, :self.hidden_size] + output[:, : ,self.hidden_size:]
        output = torch.exp(self.softmax(self.out(output))).squeeze(0)
        return output, hidden, None

def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1).to(device)))
    loss = crossEntropy.masked_select(mask.to(device)).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [108]:
class Model():
    def __init__(self, encoder, decoder, encoder_optim, decoder_optim, beam_size):
        
        self.encoder = encoder
        self.decoder = decoder
        
        self.encoder_optim = encoder_optim
        self.decoder_optim = decoder_optim
        
        self.beam_size = beam_size
        
    def fit(self, train_data, dev_data, teacher_forcing_ratio, n_epoch, print_every, n_grams):
        start = time.time()
        
        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.n_epoch = n_epoch
        
        
        print("Initializing...")
        start_epoch = 1
        print_loss_total = 0 
        plot_loss_total = 0
        plot_losses = []
        plot_train_scores = []
        plot_dev_scores = []
        
        for epoch in range(start_epoch, n_epoch):
            for i, (inp, inp_lens, output, out_mask, out_max) in enumerate(train_loader):
                loss = self.trainEpoch(inp, inp_lens, output, out_mask, out_max)
                print_loss_total += loss
                plot_loss_total += loss

                if epoch  % print_every == 0:
                    plot_loss_avg = plot_loss_total / print_every
                    plot_losses.append(plot_loss_avg)
                    plot_loss_total = 0       

                    print_loss_avg = print_loss_total / print_every
                    print_loss_total = 0
                    print("{} ({} {}) Iter: {}/{} | Loss:{:.4}".format(timeSince(start, epoch/n_epoch), 
                                                                                            epoch, 
                                                                                            epoch/n_epoch*100, 
                                                                                            i,
                                                                                            len(train_loader),
                                                                                            print_loss_avg))    
            
        train_score = self.bleuScore(train_loader, n_grams)
        dev_score = self.bleuScore(dev_loader, n_grams)
        plot_train_scores.append(train_score)
        plot_dev_scores.append(dev_score)
        print("EPOCH : {} | Train Score: {} | Dev Score: {}".format(epoch, train_score, dev_score))
        self.plot_losses = plot_losses
        self.plot_train_scores = plot_train_scores
        self.plot_dev_scores = plot_dev_scores
        return "Training Complete!"            
            
    def trainEpoch(self, inp, inp_lens, output, out_mask, out_max):
        self.encoder_optim.zero_grad()
        self.decoder_optim.zero_grad()

        loss, print_losses, n_totals = 0, [], 0
        
        encoder_output, encoder_hidden = self.encoder(inp, inp_lens)
        
        decoder_input = torch.LongTensor([[SOS_ID for _ in range(inp.size(1))]]).to(device)
        decoder_hidden = encoder_hidden[:,-1:,:].contiguous()

        if random.random() < self.teacher_forcing_ratio:
            for t in range(out_max):
                decoder_output, decoder_hidden, _ = self.decoder(decoder_input, decoder_hidden, encoder_output)
                decoder_input = output[t].view(1, -1)
                
                mask_loss, nTotal = maskNLLLoss(decoder_output, output[t], out_mask[t])
                loss += mask_loss
                print_losses.append(mask_loss.item() * nTotal)
                n_totals += nTotal
        else:
            for t in range(out_max):
                decoder_output, decoder_hidden, _ = self.decoder(decoder_input, decoder_hidden, encoder_output)
                _, topi = decoder_output.topk(1)
                decoder_input = torch.LongTensor([[topi[i][0] for i in range(inp.size(1))]]).to(device).detach()
                
                mask_loss, nTotal = maskNLLLoss(decoder_output, output[t], out_mask[t])
                loss += mask_loss
                print_losses.append(mask_loss.item() * nTotal)
                n_totals += nTotal
                
        loss.backward()

        self.encoder_optim.step()
        self.decoder_optim.step()

        return sum(print_losses) / n_totals
    
    def bleuScore(self, data_loader, n_grams):
        with torch.no_grad():

            true_outputs = []
            decoder_outputs = []

            for i, (inp, inp_lens, output, out_mask, out_max) in enumerate(data_loader):
                if i * BATCH_SIZE >= 10000:
                    break
                if i > 0:
                    break
                print(' '.join([vi.id2word[int(i)] for i in inp[:,0]]))
                print(' '.join([en.id2word[int(i)] for i in output[:,0]]))
                true_outputs += tensorToList(output)

                encoder_output, encoder_hidden = self.encoder(inp, inp_lens)

                decoder_input = torch.LongTensor([[SOS_ID for _ in range(inp.size(1))]]) # appends <SOS> to beginning of decoded text
                decoder_hidden = encoder_hidden[:,-1:,:].contiguous()

                # BEAM SEARCH BELOW (size self.beam_size)
                # candidates are stored as : (curr_scores, curr_sequences, decoder_hidden, decoder_input)
                beam_size = 1
                candidates = [([0 for _ in range(inp.size(1))], [[str(SOS_ID)] for _ in range(inp.size(1))], decoder_hidden, decoder_input) for _ in range(beam_size)]
                for t in range(out_max):
                    next_candidates = []
                    next_candidate_scores = [SortedList() for _ in range(inp.size(1))] # list of sorted lists of candidate scores for each sentence
                    next_candidate_inputs = [[] for _ in range(inp.size(1))] # dict from total curr score to next candidate token for each sentence
                    next_candidate_seqs = [[] for _ in range(inp.size(1))] # dict from total curr score to next candidate sequence
                    for curr_scores, curr_seqs, decoder_hidden, decoder_input in candidates:
                        # generate output + next hidden state given input and current hidden state of the candidate
                        decoder_output, decoder_hidden, _ = self.decoder(decoder_input, decoder_hidden, encoder_output)
                        topv, topi = decoder_output.topk(beam_size)
                        for k in range(beam_size): 
                            # calculate for each sentence the next `beam_size` best possible next tokens
                            for i in range(len(topi)):
                                if len(topi[i]) != beam_size:
                                    print('uhh', i, len(topi[i]))
                            decoder_input = torch.LongTensor([[topi[i][k] for i in range(inp.size(1))]]).to(device)
                            for i in range(inp.size(1)):
                                # for sentence `i`, add `topi[i][k]` as a candidate if the new score for the seq is better than the other candidates
                                if (curr_seqs[i][-1] == EOS_ID): #don't do anything with current sequence if already is EOS
                                    continue 
                                curr_score = curr_scores[i] + topv[i][k].item()
                                if (len(next_candidate_scores[i]) < beam_size or curr_score < next_candidate_scores[i][beam_size - 1]) and curr_score not in next_candidate_scores[i]:
                                    if len(next_candidate_scores[i]) == beam_size:
                                        next_candidate_inputs[i] = [candidate_input for candidate_input in next_candidate_inputs[i] if candidate_input[0] != next_candidate_scores[i][beam_size - 1]] # delete candidate associated with score
                                        next_candidate_seqs[i] = [candidate_seq for candidate_seq in next_candidate_seqs[i] if candidate_seq[0] != next_candidate_scores[i][beam_size - 1]] # delete candidate associated with score
                                        del next_candidate_scores[i][beam_size - 1] # delete associated score
                                    next_candidate_scores[i].insort(curr_score) # insert new score in sorted order to scores lists for the i'th sentence
                                    next_candidate_inputs[i].append((curr_score, topi[i][k].item())) # insert new token value for score key for the i'th sentence
                                    next_candidate_seqs[i].append((curr_score, curr_seqs[i] + [str(topi[i][k].item())]))
                    next_candidate_scores = [[score for score in next_candidate_scores[i]] for i in range(inp.size(1))]
                    next_candidate_seqs = [[candidate_seq[1] for candidate_seq in sorted(next_candidate_seqs[i])] for i in range(inp.size(1))]
                    next_candidate_inputs = [[candidate_input[1] for candidate_input in sorted(next_candidate_inputs[i])] for i in range(inp.size(1))]
    #                 now that each best 3 sequences for each sentence is selected, create new candidates.
                    for k in range(min(len(next_candidate_inputs[0]), len(next_candidate_scores[0]), len(next_candidate_seqs[0]))):
                        decoder_input = torch.LongTensor([[next_candidate_inputs[i][k] for i in range(inp.size(1))]])
                        next_scores = [next_candidate_scores[i][k] for i in range(inp.size(1))]
                        next_seqs = [next_candidate_seqs[i][k] for i in range(inp.size(1))]
                        next_candidates.append((next_scores, next_seqs, decoder_hidden, decoder_input))                            
                    candidates = next_candidates
                pred_outputs = [pred_out + [str(EOS_ID)] for pred_out in candidates[0][1]]
                decoder_outputs += pred_outputs
                print(' '.join([en.id2word[int(i)] for i in pred_outputs[0]])) 
                print(candidates[0][0])

        return corpus_bleu(decoder_outputs, true_outputs, n_grams)

    def showLoss(self):
        plt.figure()
        fig = plt.figure(figsize=(10,6))
        fig_plt = sns.lineplot(x=np.arange(0, self.n_epoch, int(self.n_epoch/len(self.plot_losses))), y=self.plot_losses)
        fig_plt.set_title("Loss Over Time")
        fig_plt.set_ylabel("Loss")
        fig_plt.set_xlabel("Epochs")
        return fig_plt.get_figure()
    
    def showScore(self):
        df = pd.concat([pd.DataFrame({'X':np.arange(0, self.n_epoch, int(self.n_epoch/len(self.plot_losses))), 'Y':self.plot_train_scores, 'Score':'Train'}), 
                        pd.DataFrame({'X':np.arange(0, self.n_epoch, int(self.n_epoch/len(self.plot_losses))), 'Y':self.plot_dev_scores, 'Score':'Dev'})], axis=0)
    
        plt.figure()
        pp = sns.lineplot(data=df, x = 'X', y = 'Y', hue='Score', style="Score", legend= "brief")
        fig_plt.set_title("Score Over Time")
        fig_plt.set_ylabel("Score")
        fig_plt.set_xlabel("Epoch")
        return fig_plt.get_figure()

In [110]:
LEARNING_RATE = 0.001

encoder_params = {'hidden_size':256, 'num_layers':1}
decoder_params = {'hidden_size':encoder_params['hidden_size'], 'num_layers':1, 'output_size':en.n_words}

encoder = EncoderRNN(encoder_params, vi.emb, vi.learn_ids).to(device)
encoder_optim = optim.Adam(encoder.parameters(), lr=LEARNING_RATE)

decoder = DecoderRNN(decoder_params, en.emb, en.learn_ids).to(device)
decoder_optim = optim.Adam(decoder.parameters(), lr=0.001)

model = Model(encoder, decoder, encoder_optim, decoder_optim, 3)
model.fit(train_loader, dev_loader, teacher_forcing_ratio=1.0, n_epoch=1000, print_every=25, n_grams=4)

Initializing...
0m 12s (- 7m 54s) (25 2.5) Iter: 0/1 | Loss:0.3015
0m 24s (- 7m 42s) (50 5.0) Iter: 0/1 | Loss:0.05357
0m 36s (- 7m 30s) (75 7.5) Iter: 0/1 | Loss:0.02527
0m 48s (- 7m 18s) (100 10.0) Iter: 0/1 | Loss:0.02292
1m 0s (- 7m 5s) (125 12.5) Iter: 0/1 | Loss:0.02252
1m 13s (- 6m 53s) (150 15.0) Iter: 0/1 | Loss:0.02236
1m 25s (- 6m 41s) (175 17.5) Iter: 0/1 | Loss:0.02227
1m 37s (- 6m 29s) (200 20.0) Iter: 0/1 | Loss:0.02221
1m 49s (- 6m 17s) (225 22.5) Iter: 0/1 | Loss:0.02217
2m 1s (- 6m 5s) (250 25.0) Iter: 0/1 | Loss:0.02214
2m 13s (- 5m 52s) (275 27.500000000000004) Iter: 0/1 | Loss:0.02212
2m 26s (- 5m 40s) (300 30.0) Iter: 0/1 | Loss:0.0221
2m 38s (- 5m 28s) (325 32.5) Iter: 0/1 | Loss:0.02209
2m 50s (- 5m 16s) (350 35.0) Iter: 0/1 | Loss:0.02208
3m 2s (- 5m 4s) (375 37.5) Iter: 0/1 | Loss:0.02207
3m 14s (- 4m 52s) (400 40.0) Iter: 0/1 | Loss:0.02206
3m 26s (- 4m 39s) (425 42.5) Iter: 0/1 | Loss:0.02206
3m 39s (- 4m 27s) (450 45.0) Iter: 0/1 | Loss:0.02205
3m 51s (- 4m

'Training Complete!'

In [48]:
model = Model(encoder, decoder, encoder_optim, decoder_optim, 3)
model.fit(train_loader, dev_loader, teacher_forcing_ratio=1.0, n_epoch=10, print_every=200, n_grams=4)

Initializing...
2m 54s (- 26m 13s) (1 10.0) Iter: 200/1713 | Loss:3.682
5m 48s (- 52m 19s) (1 10.0) Iter: 400/1713 | Loss:3.344
8m 41s (- 78m 13s) (1 10.0) Iter: 600/1713 | Loss:3.251
11m 34s (- 104m 9s) (1 10.0) Iter: 800/1713 | Loss:3.201
14m 25s (- 129m 51s) (1 10.0) Iter: 1000/1713 | Loss:3.176
17m 20s (- 156m 1s) (1 10.0) Iter: 1200/1713 | Loss:3.16
20m 13s (- 181m 59s) (1 10.0) Iter: 1400/1713 | Loss:3.132
23m 8s (- 208m 15s) (1 10.0) Iter: 1600/1713 | Loss:3.118
EPOCH : 1 | Train Score: 1.094366405687311 | Dev Score: 2.4735624511076146
27m 43s (- 110m 54s) (2 20.0) Iter: 200/1713 | Loss:4.849
30m 37s (- 122m 29s) (2 20.0) Iter: 400/1713 | Loss:3.072
33m 31s (- 134m 4s) (2 20.0) Iter: 600/1713 | Loss:3.066
36m 24s (- 145m 39s) (2 20.0) Iter: 800/1713 | Loss:3.048
39m 16s (- 157m 7s) (2 20.0) Iter: 1000/1713 | Loss:3.047
42m 11s (- 168m 45s) (2 20.0) Iter: 1200/1713 | Loss:3.044
45m 4s (- 180m 18s) (2 20.0) Iter: 1400/1713 | Loss:3.026
47m 58s (- 191m 55s) (2 20.0) Iter: 1600/1713

KeyboardInterrupt: 

In [None]:
model = Model(encoder, decoder, encoder_optim, decoder_optim, 3)
model.fit(train_loader, dev_loader, teacher_forcing_ratio=1.0, n_epoch=10, print_every=400, n_grams=4)

Initializing...
5m 49s (- 52m 24s) (1 10.0) Iter: 400/1713 | Loss:3.485
11m 37s (- 104m 39s) (1 10.0) Iter: 800/1713 | Loss:3.188
17m 27s (- 157m 3s) (1 10.0) Iter: 1200/1713 | Loss:3.129
23m 15s (- 209m 16s) (1 10.0) Iter: 1600/1713 | Loss:3.099
EPOCH : 1 | Train Score: 0.016542484000263192 | Dev Score: 0.03486065754955129
39m 15s (- 157m 1s) (2 20.0) Iter: 400/1713 | Loss:3.901
45m 4s (- 180m 16s) (2 20.0) Iter: 800/1713 | Loss:3.025
50m 53s (- 203m 33s) (2 20.0) Iter: 1200/1713 | Loss:3.021
56m 42s (- 226m 49s) (2 20.0) Iter: 1600/1713 | Loss:3.012
EPOCH : 2 | Train Score: 0.004849132239198011 | Dev Score: 0.018492984527430117
72m 48s (- 169m 53s) (3 30.0) Iter: 400/1713 | Loss:3.8
78m 37s (- 183m 28s) (3 30.0) Iter: 800/1713 | Loss:2.961
84m 26s (- 197m 2s) (3 30.0) Iter: 1200/1713 | Loss:2.96
90m 15s (- 210m 36s) (3 30.0) Iter: 1600/1713 | Loss:2.96
EPOCH : 3 | Train Score: 0.0040597454846406445 | Dev Score: 0.012296856172842112
106m 24s (- 159m 37s) (4 40.0) Iter: 400/1713 | Loss

In [None]:
print('dun')

In [83]:
model = Model(encoder, decoder, encoder_optim, decoder_optim)
model.fit(train_loader, dev_loader, teacher_forcing_ratio=1.0, n_epoch=10, print_every=400, n_grams=4)

Initializing...
0m 15s (- 2m 15s) (1 10.0) Iter: 0/1713 | Loss:0.01039 | TrainScore:9.301821611466174 | DevScore:10.144865992263254
6m 18s (- 56m 42s) (1 10.0) Iter: 400/1713 | Loss:3.481 | TrainScore:3.9491317896699902 | DevScore:3.4373696280375103
12m 21s (- 111m 10s) (1 10.0) Iter: 800/1713 | Loss:3.195 | TrainScore:2.9839270337976247 | DevScore:2.741219284529995
18m 25s (- 165m 49s) (1 10.0) Iter: 1200/1713 | Loss:3.132 | TrainScore:1.402114597097759 | DevScore:1.476456197084048
24m 30s (- 220m 37s) (1 10.0) Iter: 1600/1713 | Loss:3.099 | TrainScore:1.0247680801474728 | DevScore:0.937463584188316
26m 21s (- 105m 27s) (2 20.0) Iter: 0/1713 | Loss:0.8663 | TrainScore:0.7355134314590411 | DevScore:0.7061211883251046
32m 25s (- 129m 42s) (2 20.0) Iter: 400/1713 | Loss:3.031 | TrainScore:0.6045048829654783 | DevScore:0.8465479049353456
38m 29s (- 153m 59s) (2 20.0) Iter: 800/1713 | Loss:3.027 | TrainScore:0.430588790903775 | DevScore:0.4009448010837915
44m 34s (- 178m 18s) (2 20.0) Iter

'Training Complete!'

In [83]:
def testBleuScore(data_loader, n_grams, model):
    with torch.no_grad():

        true_outputs = []
        decoder_outputs = []

        for i, (inp, inp_lens, output, out_mask, out_max) in enumerate(data_loader):
            if i * BATCH_SIZE >= 10000:
                break
            if i > 0:
                break
            print(' '.join([vi.id2word[int(i)] for i in inp[:,0]]))
            print(' '.join([en.id2word[int(i)] for i in output[:,0]]))
            true_outputs += tensorToList(output)

            encoder_output, encoder_hidden = model.encoder(inp, inp_lens)

            decoder_input = torch.LongTensor([[SOS_ID for _ in range(inp.size(1))]]) # appends <SOS> to beginning of decoded text
            decoder_hidden = encoder_hidden[:,-1:,:].contiguous()

            # BEAM SEARCH BELOW (size self.beam_size)
            # candidates are stored as : (curr_scores, curr_sequences, decoder_hidden, decoder_input)
            beam_size = 1
            candidates = [([0 for _ in range(inp.size(1))], [[str(SOS_ID)] for _ in range(inp.size(1))], decoder_hidden, decoder_input) for _ in range(beam_size)]
            for t in range(out_max):
                next_candidates = []
                next_candidate_scores = [SortedList() for _ in range(inp.size(1))] # list of sorted lists of candidate scores for each sentence
                next_candidate_inputs = [[] for _ in range(inp.size(1))] # dict from total curr score to next candidate token for each sentence
                next_candidate_seqs = [[] for _ in range(inp.size(1))] # dict from total curr score to next candidate sequence
                for curr_scores, curr_seqs, decoder_hidden, decoder_input in candidates:
                    # generate output + next hidden state given input and current hidden state of the candidate
                    decoder_output, decoder_hidden, _ = model.decoder(decoder_input, decoder_hidden, encoder_output)
                    topv, topi = decoder_output.topk(beam_size)
                    for k in range(beam_size): 
                        # calculate for each sentence the next `beam_size` best possible next tokens
                        decoder_input = torch.LongTensor([[topi[i][k] for i in range(inp.size(1))]]).to(device)
                        for i in range(inp.size(1)):
                            # for sentence `i`, add `topi[i][k]` as a candidate if the new score for the seq is better than the other candidates
                            if (curr_seqs[i][-1] == EOS_ID): #don't do anything with current sequence if already is EOS
                                continue 
                            curr_score = curr_scores[i] + topv[i][k].item()
                            if (len(next_candidate_scores[i]) < beam_size or curr_score < next_candidate_scores[i][beam_size - 1]) and curr_score not in next_candidate_scores[i]:
                                if len(next_candidate_scores[i]) == beam_size:
                                    next_candidate_inputs[i] = [candidate_input for candidate_input in next_candidate_inputs[i] if candidate_input[0] != next_candidate_scores[i][beam_size - 1]] # delete candidate associated with score
                                    next_candidate_seqs[i] = [candidate_seq for candidate_seq in next_candidate_seqs[i] if candidate_seq[0] != next_candidate_scores[i][beam_size - 1]] # delete candidate associated with score
                                    del next_candidate_scores[i][beam_size - 1] # delete associated score
                                next_candidate_scores[i].insort(curr_score) # insert new score in sorted order to scores lists for the i'th sentence
                                next_candidate_inputs[i].append((curr_score, topi[i][k].item())) # insert new token value for score key for the i'th sentence
                                next_candidate_seqs[i].append((curr_score, curr_seqs[i] + [str(topi[i][k].item())]))
                next_candidate_scores = [[score for score in next_candidate_scores[i]] for i in range(inp.size(1))]
                next_candidate_seqs = [[candidate_seq[1] for candidate_seq in sorted(next_candidate_seqs[i])] for i in range(inp.size(1))]
                next_candidate_inputs = [[candidate_input[1] for candidate_input in sorted(next_candidate_inputs[i])] for i in range(inp.size(1))]
#                 now that each best 3 sequences for each sentence is selected, create new candidates.
                for k in range(min(len(next_candidate_inputs[0]), len(next_candidate_scores[0]), len(next_candidate_seqs[0]))):
                    decoder_input = torch.LongTensor([[next_candidate_inputs[i][k] for i in range(inp.size(1))]])
                    next_scores = [next_candidate_scores[i][k] for i in range(inp.size(1))]
                    next_seqs = [next_candidate_seqs[i][k] for i in range(inp.size(1))]
                    next_candidates.append((next_scores, next_seqs, decoder_hidden, decoder_input))                            
                candidates = next_candidates
            pred_outputs = [pred_out + [str(EOS_ID)] for pred_out in candidates[0][1]]
            decoder_outputs += pred_outputs
            print(' '.join([en.id2word[int(i)] for i in pred_outputs[0]])) 
            print(candidates[0][0])
            
    return corpus_bleu(decoder_outputs, true_outputs, n_grams)


In [84]:
start = time.time()
print(testBleuScore(train_loader, 4, model))
end = time.time()
print(end - start)

<sos> tôi muốn cho các bạn biết về sự to lớn của những nỗ lực khoa học đã góp phần làm nên các dòng tít bạn thường thấy trên báo . <eos>
<sos> i d like to talk to you today about the scale of the scientific effort that goes into making the headlines you see in the paper . <eos>
<sos> thank you re unaware effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects effects <eos>
[28.83336627483368, 28.869981050491333]
0.8967218816691838
0.17956876754760742


In [11]:
encoder.embedding(inp)

RuntimeError: cuda runtime error (59) : device-side assert triggered at /opt/conda/conda-bld/pytorch_1532582123400/work/aten/src/THC/generic/THCTensorCopy.cpp:70