In [1]:
import numpy as np 
import pandas as pd 
from data_processing import generate_vocab, process_data, create_dataloaders
from model import get_pretrained_emb, EncoderRNN, DecoderRNN, DecoderAttnRNN, EncoderDecoder, EncoderDecoderAttn
from train_eval import train_and_eval, count_parameters, summarize_results, plot_single_learning_curve, load_experiment_log
import pickle as pkl 
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# params required for generating data loaders 

SRC_LANG = 'zh'
TARG_LANG = 'en'

SRC_MAX_SENTENCE_LEN = 10
TARG_MAX_SENTENCE_LEN = 10
SRC_VOCAB_SIZE = 30000 
TARG_VOCAB_SIZE = 30000 

BATCH_SIZE = 64 

In [3]:
# takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [4]:
# reload from pickle 
vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
vocab = pkl.load(open(vocab_filename, "rb"))
data = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, filter_long=False)
data_minibatch = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, sample_limit=BATCH_SIZE, filter_long=False) 
data_minitrain = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, sample_limit=1000, filter_long=False)

In [5]:
# create dataloaders 
loaders_full = create_dataloaders(data, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minibatch = create_dataloaders(data_minibatch, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minitrain = create_dataloaders(data_minitrain, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)

In [6]:
# model architecture params 
NETWORK_TYPE = 'rnn'
RNN_CELL_TYPE = 'gru'
NUM_LAYERS = 2 
ENC_HIDDEN_DIM = 512
DEC_HIDDEN_DIM = 2 * ENC_HIDDEN_DIM 
TEACHER_FORCING_RATIO = 1
CLIP_GRAD_MAX_NORM = 1
ENC_DROPOUT = 0 #0.2 
DEC_DROPOUT = 0 #0.2 
ATTENTION_TYPE = 'additive'

# training params  
NUM_EPOCHS = 1 #5
LR = 0.0003 # 0.0005
OPTIMIZER = 'Adam'
LAZY_TRAIN = True

# name the model and experiment 
EXPERIMENT_NAME = 'hyperparameter_tuning_dropout'
if NETWORK_TYPE == 'rnn': 
    MODEL_NAME = '{}-rnn-{}-attn'.format(SRC_LANG, ATTENTION_TYPE)
elif NETWORK_TYPE == 'cnn': 
    MODEL_NAME = '{}-cnn'.format(SRC_LANG)

In [7]:
# store as dict to save to results later 
params = {'experiment_name': EXPERIMENT_NAME,'model_name': MODEL_NAME, 'src_lang': SRC_LANG, 'targ_lang': TARG_LANG, 
          'rnn_cell_type': RNN_CELL_TYPE, 'src_max_sentence_len': SRC_MAX_SENTENCE_LEN, 
          'targ_max_sentence_len': TARG_MAX_SENTENCE_LEN, 'src_vocab_size': SRC_VOCAB_SIZE, 
          'targ_vocab_size': TARG_VOCAB_SIZE, 'num_layers': NUM_LAYERS, 'enc_hidden_dim': ENC_HIDDEN_DIM, 
          'dec_hidden_dim': DEC_HIDDEN_DIM, 'teacher_forcing_ratio': TEACHER_FORCING_RATIO, 
          'clip_grad_max_norm': CLIP_GRAD_MAX_NORM, 'enc_dropout': ENC_DROPOUT, 'dec_dropout': DEC_DROPOUT, 
          'attention_type': ATTENTION_TYPE, 'batch_size': BATCH_SIZE, 'num_epochs': NUM_EPOCHS, 
          'learning_rate': LR, 'optimizer': OPTIMIZER, 'lazy_train': LAZY_TRAIN} 

In [10]:
for candidate in [0, 0.2, 0.5]: 
    
    print("Training with dropout = {}".format(candidate))
    
    # overwrite relevant key-value in params 
    params['enc_dropout'] = candidate 
    params['dec_dropout'] = candidate
    params['model_name'] = '{}-rnn-{}-attn-{}-dropout'.format(SRC_LANG, ATTENTION_TYPE, candidate)
    
    # instantiate model 
    encoder = EncoderRNN(rnn_cell_type=RNN_CELL_TYPE, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
                         src_max_sentence_len=SRC_MAX_SENTENCE_LEN, enc_dropout=candidate, 
                         pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))
    decoder = DecoderAttnRNN(rnn_cell_type=RNN_CELL_TYPE, dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, 
                             num_layers=NUM_LAYERS, targ_vocab_size=TARG_VOCAB_SIZE, 
                             src_max_sentence_len=SRC_MAX_SENTENCE_LEN, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
                             dec_dropout=candidate, attention_type=ATTENTION_TYPE,
                             pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], 
                                                                    vocab[TARG_LANG]['token2id']))
    model = EncoderDecoderAttn(encoder, decoder, vocab[TARG_LANG]['token2id']).to(device) 
    
    # train and eval 
    model, results = train_and_eval(
        model=model, loaders_full=loaders_full, loaders_minibatch=loaders_minibatch, loaders_minitrain=loaders_minitrain, 
        params=params, vocab=vocab, print_intermediate=100, save_checkpoint=True, save_to_log=True, 
        lazy_eval=False, print_attn=True, inspect_samples=1)

Training with dropout = 0
Epoch: 0.00, Train Loss: 9.97, Val Loss: 10.12, Train BLEU: 0.17, Val BLEU: 0.14, Minutes Elapsed: 0.18
Sampling from training predictions...
Source: 它们 有 的 会 贴近 潜水 潜水艇 它们 的 眼睛
Reference: they come right up to the submarine -- they
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.1539, 0.1332, 0.1155, 0.1023, 0.0942, 0.0897, 0.0889, 0.0859, 0.0741,
         0.0624],
        [0.1544, 0.1334, 0.1155, 0.1022, 0.0941, 0.0896, 0.0888, 0.0858, 0.0740,
         0.0623],
        [0.1544, 0.1334, 0.1155, 0.1022, 0.0941, 0.0896, 0.0888, 0.0858, 0.0740,
         0.0623],
        [0.1544, 0.1334, 0.1155, 0.1022, 0.0941, 0.0896, 0.0888, 0.0858, 0.0740,
         0.0623],
        [0.1543, 0.1334, 0.1156, 0.1022, 0.0941, 0.0896, 0.0888, 0.0858, 0.0740,
         0.0623],
        [0.1543, 0.1334, 0.1156, 0.1022, 0.0941, 0.0896, 0.0888, 0.0858, 0.0740,
         0.0622],
        [0.1543, 0.1334, 0.1156, 0.1022, 0.0941, 0.0896, 0.0888, 0.0858, 0.0740,

In [16]:
experiment_results = load_experiment_log(experiment_name=EXPERIMENT_NAME)
summarize_results(experiment_results)[['enc_dropout', 'dec_dropout', 'best_val_loss', 'best_val_bleu', 'runtime']]

Unnamed: 0,enc_dropout,dec_dropout,best_val_loss,best_val_bleu,runtime
2,0.5,0.5,10.148959,0.141454,0.185415
1,0.2,0.2,10.134708,0.14206,0.187911
0,0.0,0.0,10.119202,0.144099,0.188242


In [None]:
# plot 