In [1]:
import BasicModel
import pickle as pkl
import numpy as np
import gzip
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import time

from utils import asMinutes, timeSince, load_zipped_pickle, corpus_bleu, directories
from langUtils import loadLangPairs, langDataset, langCollateFn, initHybridEmbeddings, tensorToList

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

zh, en = loadLangPairs("zh")
BATCH_SIZE = 32
train_dataset = langDataset([(zh.train_num[i], en.train_num[i]) for i in range(len(zh.train_num)) if (len(zh.train[i]) < zh.max_length) & (len(en.train[i]) < en.max_length)])
# overfit_dataset = langDataset([(zh.train_num[i], en.train_num[i]) for i in range(32)])
# overfit_loader = torch.utils.data.DataLoader(dataset=overfit_dataset,
#                                            batch_size=BATCH_SIZE,
#                                            collate_fn=langCollateFn,
#                                            shuffle=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=True)
dev_dataset = langDataset([(zh.dev_num[i], en.dev_num[i]) for i in range(len(zh.dev_num)) if (len(zh.dev[i]) < zh.max_length) & (len(en.dev[i]) < en.max_length)])
dev_loader = torch.utils.data.DataLoader(dataset=dev_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=True)

In [3]:
HIDDEN_SIZE = 128
learning_rate = 0.01

## Add ignore index
zh_criterion = nn.CrossEntropyLoss(ignore_index=0).to(device)

zh_encoder = BasicModel.EncoderRNN(input_size = zh.n_words, hidden_size = HIDDEN_SIZE, num_layers = 1, batch_size = BATCH_SIZE, raw_emb=zh.emb, learn_ids=zh.learn_ids).to(device)
zh_decoder = BasicModel.DecoderRNN(hidden_size = HIDDEN_SIZE, output_size = en.n_words, num_layers = 1, batch_size = BATCH_SIZE, raw_emb=en.emb, learn_ids=en.learn_ids).to(device)

zh_encoder_optimizer = optim.Adam(zh_encoder.parameters(), lr=learning_rate)
zh_decoder_optimizer = optim.Adam(zh_decoder.parameters(), lr=learning_rate)

In [5]:
def fit(train_loader, dev_loader, encoder, decoder, encoder_opt, decoder_opt, criterion, batch_size, epochs, print_every, hidden_size):
    start = time.time()
    print('Initializing Model Training + Eval...')
    losses = []
    train_scores = []
    dev_scores = []
    for epoch in range(epochs):
        loss = 0
        for i, (inp, inp_lens, output, out_mask, out_max) in enumerate(train_loader):
            if (len(inp[0]) != batch_size):
                continue
            inp.transpose_(0,1)
            output.transpose_(0,1)
            inp = inp.to(device)
            output = output.to(device)
            loss += BasicModel.train(inp, output, out_max, encoder, decoder, encoder_opt, decoder_opt, criterion, batch_size, hidden_size)
            if i % print_every == 0 and i > 0:
                losses.append(loss/i)
                print("Time Elapsed: {} | Loss: {:.4}".format(asMinutes(time.time() - start),
                                                                                loss/i))
        train_score = BasicModel.bleuEval(encoder, decoder, train_loader, batch_size, hidden_size)
        dev_score = BasicModel.bleuEval(encoder, decoder, dev_loader, batch_size, hidden_size)
        train_scores.append(train_score)
        dev_scores.append(dev_score)
        print("Epoch: {} | Time Elapsed: {} | Loss: {:.4} | Train BLEU: {:.4} | Dev BLEU: {:.4}".format(epoch + 1 + 10, 
                                                                                                        asMinutes(time.time() - start),
                                                                                                        loss/len(train_loader), 
                                                                                                        train_score, 
                                                                                                        dev_score))

In [None]:
fit(train_loader, dev_loader, zh_encoder, zh_decoder, zh_encoder_optimizer, zh_decoder_optimizer, zh_criterion, BATCH_SIZE, 15, 300, HIDDEN_SIZE)

Initializing Model Training + Eval...
Time Elapsed: 3m 3s | Loss: 5.481
Time Elapsed: 5m 32s | Loss: 5.095
Time Elapsed: 7m 27s | Loss: 4.896
Time Elapsed: 9m 23s | Loss: 4.769
Time Elapsed: 11m 44s | Loss: 4.679
Time Elapsed: 14m 50s | Loss: 4.609
Time Elapsed: 17m 56s | Loss: 4.552
Time Elapsed: 21m 0s | Loss: 4.508
Time Elapsed: 24m 6s | Loss: 4.468
Time Elapsed: 27m 11s | Loss: 4.435
Time Elapsed: 30m 16s | Loss: 4.408
Time Elapsed: 33m 22s | Loss: 4.384
Time Elapsed: 36m 25s | Loss: 4.362
Time Elapsed: 39m 31s | Loss: 4.342
Time Elapsed: 42m 36s | Loss: 4.323
Time Elapsed: 45m 41s | Loss: 4.306
Time Elapsed: 48m 47s | Loss: 4.291
Time Elapsed: 51m 51s | Loss: 4.277
Epoch: 11 | Time Elapsed: 59m 46s | Loss: 4.274 | Train BLEU: 2.297 | Dev BLEU: 2.004
Time Elapsed: 62m 52s | Loss: 3.9
Time Elapsed: 65m 57s | Loss: 3.883
Time Elapsed: 69m 2s | Loss: 3.886
Time Elapsed: 72m 8s | Loss: 3.892
Time Elapsed: 74m 49s | Loss: 3.892
Time Elapsed: 76m 46s | Loss: 3.893
Time Elapsed: 78m 41s |