In [1]:
import BasicModel
import pickle as pkl
import numpy as np
import gzip
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import time

from utils import asMinutes, timeSince, load_zipped_pickle, corpus_bleu, directories
from langUtils import loadLangPairs, langDataset, langCollateFn, initHybridEmbeddings, tensorToList

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

zh, en = loadLangPairs("zh")
BATCH_SIZE = 32
train_dataset = langDataset([(zh.train_num[i], en.train_num[i]) for i in range(len(zh.train_num)) if (len(zh.train[i]) < zh.max_length) & (len(en.train[i]) < en.max_length)])
# overfit_dataset = langDataset([(zh.train_num[i], en.train_num[i]) for i in range(32)])
# overfit_loader = torch.utils.data.DataLoader(dataset=overfit_dataset,
#                                            batch_size=BATCH_SIZE,
#                                            collate_fn=langCollateFn,
#                                            shuffle=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=True)
dev_dataset = langDataset([(zh.dev_num[i], en.dev_num[i]) for i in range(len(zh.dev_num)) if (len(zh.dev[i]) < zh.max_length) & (len(en.dev[i]) < en.max_length)])
dev_loader = torch.utils.data.DataLoader(dataset=dev_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=True)

In [2]:
HIDDEN_SIZE = 128
learning_rate = 0.01

## Add ignore index
zh_criterion = nn.CrossEntropyLoss(ignore_index=0).to(device)

zh_encoder = BasicModel.EncoderRNN(input_size = zh.n_words, hidden_size = HIDDEN_SIZE, num_layers = 1, batch_size = BATCH_SIZE, raw_emb=zh.emb, learn_ids=zh.learn_ids).to(device)
zh_decoder = BasicModel.DecoderRNN(hidden_size = HIDDEN_SIZE, output_size = en.n_words, num_layers = 1, batch_size = BATCH_SIZE, raw_emb=en.emb, learn_ids=en.learn_ids).to(device)

zh_encoder_optimizer = optim.Adam(zh_encoder.parameters(), lr=learning_rate)
zh_decoder_optimizer = optim.Adam(zh_decoder.parameters(), lr=learning_rate)

In [1]:
SPECIAL_SYMBOLS_ID = PAD_ID, UNK_ID, SOS_ID, EOS_ID = 0, 1, 2, 3

In [3]:
def fit(train_loader, dev_loader, encoder, decoder, encoder_opt, decoder_opt, criterion, batch_size, epochs, print_every, hidden_size):
    start = time.time()
    print('Initializing Model Training + Eval...')
    losses = []
    train_scores = []
    dev_scores = []
    for epoch in range(epochs):
        loss = 0
        for i, (inp, inp_lens, output, out_mask, out_max) in enumerate(train_loader):
            if (len(inp[0]) != batch_size):
                continue
            inp.transpose_(0,1)
            output.transpose_(0,1)
            inp = inp.to(device)
            output = output.to(device)
            loss += BasicModel.train(inp, output, out_max, encoder, decoder, encoder_opt, decoder_opt, criterion, batch_size, hidden_size)
            if i % print_every == 0 and i > 0:
                losses.append(loss/i)
                print("Time Elapsed: {} | Loss: {:.4}".format(asMinutes(time.time() - start),
                                                                                loss/i))
                pkl.dump(encoder, open("./zh-g-base-encoder-adam0.01.p", "wb"))
                pkl.dump(decoder, open("./zh-g-base-decoder-adam0.01.p", "wb"))
        train_score = BasicModel.bleuEval(encoder, decoder, train_loader, batch_size, hidden_size)
        train_scores.append(train_score)
        print("Epoch: {} | Time Elapsed: {} | Loss: {:.4} | Train BLEU: {:.4}".format(epoch + 1, 
                                                                                                        asMinutes(time.time() - start),
                                                                                                        loss/len(train_loader), 
                                                                                                        train_score))

In [None]:
fit(train_loader, dev_loader, zh_encoder, zh_decoder, zh_encoder_optimizer, zh_decoder_optimizer, zh_criterion, BATCH_SIZE, 15, 300, HIDDEN_SIZE)

Initializing Model Training + Eval...
Time Elapsed: 4m 39s | Loss: 5.744
Time Elapsed: 9m 18s | Loss: 5.521
Time Elapsed: 13m 58s | Loss: 5.418
Time Elapsed: 18m 40s | Loss: 5.279
Time Elapsed: 23m 20s | Loss: 5.179
Time Elapsed: 28m 0s | Loss: 5.101
Time Elapsed: 32m 39s | Loss: 5.05
Time Elapsed: 37m 19s | Loss: 5.003
Time Elapsed: 42m 3s | Loss: 4.962
Time Elapsed: 46m 45s | Loss: 4.932
Time Elapsed: 51m 26s | Loss: 4.902
Time Elapsed: 56m 4s | Loss: 4.875
Time Elapsed: 60m 45s | Loss: 4.846
Time Elapsed: 65m 27s | Loss: 4.821
Time Elapsed: 70m 6s | Loss: 4.8
Time Elapsed: 74m 47s | Loss: 4.781
Time Elapsed: 79m 24s | Loss: 4.761
Time Elapsed: 83m 57s | Loss: 4.742
Epoch: 1 | Time Elapsed: 90m 59s | Loss: 4.738 | Train BLEU: 1.116
Time Elapsed: 94m 15s | Loss: 4.287
Time Elapsed: 97m 34s | Loss: 4.276
Time Elapsed: 100m 50s | Loss: 4.276
Time Elapsed: 104m 53s | Loss: 4.277
Time Elapsed: 109m 32s | Loss: 4.268
Time Elapsed: 114m 11s | Loss: 4.256
Time Elapsed: 118m 52s | Loss: 4.25


In [5]:
decoder

NameError: name 'decoder' is not defined

In [None]:
# # ^output got cut off due to accidentally exiting jupyter notebook