In [16]:
import time
import math
import random
import os
from itertools import zip_longest

import numpy as np
import pandas as pd

import pickle as pkl
import gzip

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.nn.functional as F

from torch import optim

from utils import asMinutes, timeSince, load_zipped_pickle, corpus_bleu, directories
from langUtils import loadLangPairs, langDataset, langCollateFn, initHybridEmbeddings, EncoderRNN, DecoderRNN
from trainUtils import train, fit, bleuEval

import matplotlib.pyplot as plt
plt.switch_backend('agg')
import seaborn as sns; sns.set()
sns.set_style("darkgrid")
sns.set_context("paper")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [2]:
data_dir, em_dir, fig_dir = directories()

SPECIAL_SYMBOLS_ID = PAD_ID, UNK_ID, SOS_ID, EOS_ID = 0, 1, 2, 3
NUM_SPECIAL = len(SPECIAL_SYMBOLS_ID)

vi, en = loadLangPairs("vi")

In [3]:
BATCH_SIZE = 64

train_dataset = langDataset([(vi.train_num[i], en.train_num[i]) for i in range(len(vi.train_num)) if (len(vi.train[i]) < vi.max_length) & (len(en.train[i]) < en.max_length)])
overfit_dataset = langDataset([(vi.train_num[i], en.train_num[i]) for i in range(2  * BATCH_SIZE + 1)])
train_loader = torch.utils.data.DataLoader(dataset=overfit_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=False)
dev_dataset = langDataset([(vi.dev_num[i], en.dev_num[i]) for i in range(len(vi.dev_num)) if (len(vi.dev[i]) < vi.max_length) & (len(en.dev[i]) < en.max_length)])
dev_loader = torch.utils.data.DataLoader(dataset=dev_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=True)

In [4]:
class LocalAttn(torch.nn.Module):
    def __init__(self, hidden_size):
        super(LocalAttn, self).__init__()
        
        self.hidden_size = hidden_size

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def forward(self, hidden, encoder_outputs):
        attn_energies = (torch.sum(hidden * encoder_output, dim=2)).t()
        return F.softmax(attn_energies, dim=1).unsqueeze(1)

In [5]:
class LocalAttnDecoder(nn.Module):
    def __init__(self, params, raw_emb, learn_ids):
        super(LocalAttnDecoder, self).__init__()
        
        self.attn = params['attn']
        self.hidden_size = params['hidden_size']
        self.output_size = params['output_size']
        self.n_layers = params['n_layers']
        self.dropout = params['dropout']

        # Define layers
        elf.embedding = initHybridEmbeddings(raw_emb, learn_ids)
        self.embedding_dropout = nn.Dropout(self.dropout)
        self.gru = nn.GRU(self.embedding.embedding_dim, self.hidden_size, self.n_layers, dropout=(0 if n_layers == 1 else self.dropout))
        self.concat = nn.Linear(self.hidden_size * 2, self.hidden_szie)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, inp, prev_hidden, encoder_output):
        embedded = self.embedding(inp)
        embedded = self.embedding_dropout(embedded)
        
        output, hidden = self.gru(embedded, prev_hidden)
        
        attn_weights = self.attn(output, encoder_output)
        context = attn_weights.bmm(encoder_output.transpose(0, 1)).squeeze(1)
        output = output.squeeze(0)
        
        concat_input = torch.cat((output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))

        output = self.out(concat_output)
        return output, hidden

In [6]:
LEARNING_RATE = 0.01

encoder_params = {'hidden_size':256, 'n_layers':1}
decoder_params = {'hidden_size':encoder_params['hidden_size'], 'n_layers':1, 'output_size':en.n_words}

encoder = EncoderRNN(encoder_params, vi.emb, vi.learn_ids).to(device)
encoder_optim = optim.Adam(encoder.parameters(), lr=LEARNING_RATE)

decoder = DecoderRNN(decoder_params, en.emb, en.learn_ids).to(device)
decoder_optim = optim.Adam(decoder.parameters(), lr=LEARNING_RATE)

criterion = nn.CrossEntropyLoss(ignore_index=PAD_ID).to(device)

In [8]:
losses, train_scores, dev_scores = fit(train_loader, dev_loader, encoder, decoder, encoder_optim, decoder_optim, criterion, 5, 1, "vi")

Initializing
*************************************************
Epoch: 0
True Translation: ['<sos>', 'in', '4', 'minutes', ',', 'atmospheric', 'chemist', '<unk>', 'pike', 'provides', 'a', 'glimpse', 'of', 'the', 'massive', 'scientific', 'effort', 'behind', 'the', 'bold', 'headlines', 'on', 'climate', 'change', ',', 'with', 'her', 'team', '--', 'one', 'of', 'thousands', 'who', 'contributed', '--', 'taking', 'a', 'risky', 'flight', 'over', 'the', 'rainforest', 'in', 'pursuit', 'of', 'data', 'on', 'a', 'key', 'molecule', '.', '<eos>']
Predicted Translation: ['<sos>', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '<eos>']
Time Elapsed: 0m 13s | Loss: 5.0
Train Score: 0.34 | Dev Score: 0.1488
*************************************************

*************************************************
Epoch: 0
True Translation: ['<sos>', 'in', '4', 'minutes', ',', 'atmospheric', 'chemist', '<unk>', 'pike', 'provides', 'a', 'glimpse', 'of', 'the', 'massive', 'scientific', 'effort', '

In [None]:
attn = LocalAttn(encoder_params['hidden_size'])
attn_decoder_params = {'hidden_size':encoder_params['hidden_size'], 'n_layers':1, 'output_size':en.n_words, 'dropout':0.1, 'attn':attn}
attn_decoder = LocalAttnDecoder(attn_decoder_params, en.emb, en.learn_ids).to(device)
decoder_optim = optim.Adam(decoder.parameters(), lr=LEARNING_RATE)