In [1]:
import numpy as np 
import pandas as pd 
from data_processing import generate_vocab, process_data, create_dataloaders 
from model import get_pretrained_emb, EncoderDecoder, EncoderRNN, DecoderRNN, DecoderSimpleRNN, EncoderSimpleRNN, \
    Attention, DecoderAttnRNN, DecoderRNNV2, EncoderDecoderAttention, EncoderSimpleRNN_Test, DecoderAttnRNN_Test
from train_eval import count_parameters, summarize_results, \
    plot_single_learning_curve, load_experiment_log
from train_eval import train_and_eval, train_and_eval_attn 
import importlib
import pickle as pkl 
import torch

In [2]:
# model identification
MODEL_NAME = 'zh-seq2seq-rnn-attention'
SRC_LANG = 'zh'
TARG_LANG = 'en'

# data processing params  
SRC_MAX_SENTENCE_LEN = 10
TARG_MAX_SENTENCE_LEN = 10
SRC_VOCAB_SIZE = 30000 #30000
TARG_VOCAB_SIZE = 30000 #30000

# model architecture params 
RNN_CELL_TYPE = 'gru'
NUM_LAYERS = 2 #2 
ENC_HIDDEN_DIM = 256 #512
DEC_HIDDEN_DIM = 2 * ENC_HIDDEN_DIM #2 * ENC_HIDDEN_DIM 
TEACHER_FORCING_RATIO = 1
CLIP_GRAD_MAX_NORM = 1
ENC_DROPOUT = 0.2 # to actually implement
DEC_DROPOUT = 0.2 # to actually implement

# training params  
BATCH_SIZE = 32 #32
NUM_EPOCHS = 200
LR = 0.0005 # 0.0005
OPTIMIZER = 'Adam'
LAZY_TRAIN = True

In [3]:
# store as dict to save to results later 
params = {'model_name': MODEL_NAME, 'src_lang': SRC_LANG, 'targ_lang': TARG_LANG, 'rnn_cell_type': RNN_CELL_TYPE, 
          'src_max_sentence_len': SRC_MAX_SENTENCE_LEN, 'targ_max_sentence_len': TARG_MAX_SENTENCE_LEN, 
          'src_vocab_size': SRC_VOCAB_SIZE, 'targ_vocab_size': TARG_VOCAB_SIZE, 
          'num_layers': NUM_LAYERS, 'enc_hidden_dim': ENC_HIDDEN_DIM, 'dec_hidden_dim': DEC_HIDDEN_DIM,
          'teacher_forcing_ratio': TEACHER_FORCING_RATIO, 'clip_grad_max_norm': CLIP_GRAD_MAX_NORM,
          'enc_dropout': ENC_DROPOUT, 'dec_dropout': DEC_DROPOUT, 
          'batch_size': BATCH_SIZE, 'num_epochs': NUM_EPOCHS, 'learning_rate': LR, 'optimizer': OPTIMIZER, 
          'lazy_train': LAZY_TRAIN} 

In [4]:
#vocab_test = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)

In [5]:
#vocab['zh']['id2token'][987]

In [6]:
#vocab['zh']['token2id']['森林']

In [7]:
#vocab['en']['token2id']['activity']

In [8]:
#vocab['en']['id2token'][987]

In [9]:
# # takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [10]:
# reload from pickle 
vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
vocab = pkl.load(open(vocab_filename, "rb"))
data = process_data(SRC_LANG, TARG_LANG, vocab)
data_minibatch = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=BATCH_SIZE) 
data_minitrain = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=1000)

In [11]:
# # takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab-fake.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [12]:
# vocab_filename = "{}-{}-vocab-fake.p".format(SRC_LANG, TARG_LANG)
# vocab = pkl.load(open(vocab_filename, "rb"))
# data = process_data(SRC_LANG, TARG_LANG, vocab)
# limited_data = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=BATCH_SIZE) 

In [13]:
# create dataloaders 
loaders_full = create_dataloaders(data, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minibatch = create_dataloaders(data_minibatch, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minitrain = create_dataloaders(data_minitrain, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)

In [14]:
# define model 

# encoder = EncoderRNN(enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
#                      pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))
# encoder = EncoderSimpleRNN(enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
#                            pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))
# encoder = EncoderSimpleRNN(enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
#                            pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))
encoder = EncoderSimpleRNN_Test(rnn_cell_type=RNN_CELL_TYPE, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
                                enc_dropout=ENC_DROPOUT, pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))

# decoder = DecoderRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS,
#                       targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                       pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))

# decoder = DecoderRNNV2(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
#                        targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                        pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))
# decoder = DecoderSimpleRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
#                            targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                            pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))
# decoder = DecoderAttnRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
#                          targ_vocab_size=TARG_VOCAB_SIZE, src_max_sentence_len=SRC_MAX_SENTENCE_LEN, 
#                          targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                          pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))
decoder = DecoderAttnRNN_Test(rnn_cell_type=RNN_CELL_TYPE, dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
                         targ_vocab_size=TARG_VOCAB_SIZE, src_max_sentence_len=SRC_MAX_SENTENCE_LEN, 
                         targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, dec_dropout=DEC_DROPOUT, 
                         pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))


model = EncoderDecoderAttention(encoder, decoder, vocab[TARG_LANG]['token2id']) 

In [None]:
model, results = train_and_eval_attn(
    model=model, loaders_full=loaders_full, loaders_minibatch=loaders_minibatch, loaders_minitrain=loaders_minitrain, 
    params=params, vocab=vocab, print_intermediate=True, save_checkpoint=True, save_to_log=True, 
    lazy_eval=False, inspect_samples=1)

Epoch: 0.00, Train Loss: 10.03, Val Loss: 10.20, Train BLEU: 3.69, Val BLEU: 0.27
Sampling from training predictions...
Source: 大卫 <UNK> 这位 是 比尔 <UNK> 我 是 大卫 <UNK>
Reference: this is bill lange . i &apos;m dave gallo
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.1047, 0.1023, 0.1051, 0.1015, 0.0921, 0.0978, 0.1031, 0.1021, 0.0990,
         0.0923],
        [0.1047, 0.1023, 0.1051, 0.1015, 0.0921, 0.0978, 0.1031, 0.1021, 0.0990,
         0.0922],
        [0.1047, 0.1023, 0.1052, 0.1016, 0.0921, 0.0978, 0.1031, 0.1021, 0.0990,
         0.0922],
        [0.1047, 0.1023, 0.1052, 0.1016, 0.0921, 0.0978, 0.1031, 0.1021, 0.0990,
         0.0922],
        [0.1048, 0.1023, 0.1052, 0.1016, 0.0921, 0.0978, 0.1031, 0.1021, 0.0990,
         0.0922],
        [0.1048, 0.1023, 0.1052, 0.1016, 0.0921, 0.0978, 0.1031, 0.1021, 0.0990,
         0.0922],
        [0.1048, 0.1024, 0.1052, 0.1015, 0.0921, 0.0978, 0.1031, 0.1020, 0.0990,
         0.0922],
        [0.1048, 0.1024

Epoch: 4.00, Train Loss: 8.28, Val Loss: 9.52, Train BLEU: 0.31, Val BLEU: 0.22
Sampling from training predictions...
Source: 海洋 里 生物 的 多样 多样性 和 密度 要 比
Reference: the biodiversity and the <UNK> in the ocean is
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.0303, 0.0925, 0.1301, 0.1602, 0.1318, 0.0753, 0.1421, 0.1232, 0.0865,
         0.0280],
        [0.0295, 0.0922, 0.1301, 0.1609, 0.1322, 0.0751, 0.1426, 0.1236, 0.0863,
         0.0274],
        [0.0293, 0.0920, 0.1301, 0.1611, 0.1323, 0.0751, 0.1427, 0.1236, 0.0863,
         0.0274],
        [0.0293, 0.0919, 0.1300, 0.1611, 0.1323, 0.0752, 0.1426, 0.1236, 0.0865,
         0.0275],
        [0.0294, 0.0919, 0.1298, 0.1609, 0.1323, 0.0753, 0.1425, 0.1236, 0.0866,
         0.0277],
        [0.0295, 0.0920, 0.1298, 0.1607, 0.1322, 0.0754, 0.1424, 0.1236, 0.0867,
         0.0278],
        [0.0295, 0.0920, 0.1297, 0.1605, 0.1321, 0.0754, 0.1423, 0.1236, 0.0868,
         0.0279],
        [0.0296, 0.0920, 0.12

Epoch: 7.00, Train Loss: 6.83, Val Loss: 8.99, Train BLEU: 0.28, Val BLEU: 0.19
Sampling from training predictions...
Source: 还有 这些 摇晃 着 旋转 转着 的 触角 <EOS> <PAD>
Reference: it &apos;s got tentacles dangling , swirling around like
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[1.6944e-04, 1.0960e-03, 2.2571e-03, 2.9221e-03, 2.1334e-03, 8.2567e-04,
         1.1863e-03, 2.7416e-04, 9.2408e-06, 9.8913e-01],
        [8.6609e-05, 6.2077e-04, 1.3401e-03, 1.7676e-03, 1.2643e-03, 4.6205e-04,
         6.7492e-04, 1.4291e-04, 4.2833e-06, 9.9364e-01],
        [6.3650e-05, 4.6150e-04, 1.0049e-03, 1.3304e-03, 9.4725e-04, 3.4294e-04,
         5.0301e-04, 1.0542e-04, 3.2076e-06, 9.9524e-01],
        [6.2673e-05, 4.4583e-04, 9.6111e-04, 1.2662e-03, 9.0338e-04, 3.3097e-04,
         4.8395e-04, 1.0289e-04, 3.2284e-06, 9.9544e-01],
        [6.7743e-05, 4.6894e-04, 9.9683e-04, 1.3049e-03, 9.3467e-04, 3.4796e-04,
         5.0627e-04, 1.0977e-04, 3.5571e-06, 9.9526e-01],
        [

Epoch: 10.00, Train Loss: 5.63, Val Loss: 8.65, Train BLEU: 0.28, Val BLEU: 0.19
Sampling from training predictions...
Source: <UNK> 塞尔 <UNK> <UNK> 斯特 说 过 真正 的 探索
Reference: marcel proust said , &quot; the true voyage of
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[1.2411e-05, 1.3072e-04, 5.2154e-04, 2.9466e-03, 3.0546e-02, 2.1471e-01,
         3.0808e-01, 2.7389e-01, 1.5530e-01, 1.3860e-02],
        [6.5165e-06, 6.0912e-05, 2.4477e-04, 1.5056e-03, 2.0291e-02, 2.1071e-01,
         3.3230e-01, 2.8619e-01, 1.4068e-01, 8.0105e-03],
        [7.1135e-06, 6.0205e-05, 2.3172e-04, 1.3865e-03, 1.8785e-02, 2.0872e-01,
         3.3696e-01, 2.8859e-01, 1.3787e-01, 7.3790e-03],
        [7.3811e-06, 6.1221e-05, 2.3318e-04, 1.3840e-03, 1.8677e-02, 2.0848e-01,
         3.3735e-01, 2.8881e-01, 1.3763e-01, 7.3656e-03],
        [7.4708e-06, 6.1810e-05, 2.3506e-04, 1.3927e-03, 1.8744e-02, 2.0855e-01,
         3.3717e-01, 2.8871e-01, 1.3772e-01, 7.4108e-03],
        [7.5047e

Epoch: 14.00, Train Loss: 4.59, Val Loss: 8.64, Train BLEU: 0.35, Val BLEU: 0.22
Sampling from training predictions...
Source: 大家 想想 海洋 占 了 地球 球面 面积 的 75
Reference: when you think about it , the oceans are
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.0239, 0.0999, 0.1325, 0.1466, 0.1482, 0.1431, 0.1347, 0.1123, 0.0573,
         0.0015],
        [0.0099, 0.0884, 0.1384, 0.1626, 0.1652, 0.1556, 0.1406, 0.1039, 0.0352,
         0.0002],
        [0.0083, 0.0853, 0.1388, 0.1657, 0.1687, 0.1580, 0.1414, 0.1019, 0.0319,
         0.0002],
        [0.0081, 0.0852, 0.1388, 0.1660, 0.1690, 0.1582, 0.1414, 0.1016, 0.0315,
         0.0002],
        [0.0082, 0.0853, 0.1389, 0.1660, 0.1689, 0.1581, 0.1414, 0.1015, 0.0315,
         0.0002],
        [0.0082, 0.0855, 0.1390, 0.1659, 0.1688, 0.1581, 0.1413, 0.1015, 0.0315,
         0.0002],
        [0.0083, 0.0856, 0.1390, 0.1659, 0.1688, 0.1580, 0.1413, 0.1015, 0.0316,
         0.0002],
        [0.0083, 0.0857, 0.1390, 0

Epoch: 18.00, Train Loss: 4.13, Val Loss: 9.16, Train BLEU: 3.87, Val BLEU: 0.31
Sampling from training predictions...
Source: 大家 想想 海洋 占 了 地球 球面 面积 的 75
Reference: when you think about it , the oceans are
Model: <SOS> it the the the the the , , ,
Attention Weights: tensor([[0.0317, 0.1112, 0.1334, 0.1417, 0.1412, 0.1365, 0.1287, 0.1102, 0.0634,
         0.0021],
        [0.0198, 0.1077, 0.1387, 0.1509, 0.1504, 0.1435, 0.1323, 0.1065, 0.0496,
         0.0006],
        [0.0169, 0.1059, 0.1399, 0.1536, 0.1532, 0.1456, 0.1333, 0.1052, 0.0459,
         0.0004],
        [0.0159, 0.1053, 0.1403, 0.1546, 0.1543, 0.1465, 0.1336, 0.1046, 0.0445,
         0.0004],
        [0.0157, 0.1053, 0.1405, 0.1548, 0.1545, 0.1466, 0.1337, 0.1045, 0.0441,
         0.0004],
        [0.0157, 0.1054, 0.1405, 0.1549, 0.1545, 0.1466, 0.1337, 0.1044, 0.0440,
         0.0004],
        [0.0158, 0.1055, 0.1405, 0.1548, 0.1545, 0.1466, 0.1336, 0.1044, 0.0440,
         0.0004],
        [0.0158, 0.1056, 0.1405, 0.1548,

Epoch: 22.00, Train Loss: 3.94, Val Loss: 9.74, Train BLEU: 3.80, Val BLEU: 0.32
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> it the the the the the , , ,
Attention Weights: tensor([[0.0410, 0.1239, 0.1423, 0.1457, 0.1464, 0.1410, 0.1294, 0.1028, 0.0266,
         0.0009],
        [0.0317, 0.1231, 0.1472, 0.1518, 0.1527, 0.1457, 0.1308, 0.0980, 0.0187,
         0.0004],
        [0.0287, 0.1224, 0.1487, 0.1539, 0.1549, 0.1473, 0.1311, 0.0962, 0.0165,
         0.0003],
        [0.0269, 0.1220, 0.1496, 0.1552, 0.1562, 0.1483, 0.1314, 0.0950, 0.0151,
         0.0002],
        [0.0264, 0.1221, 0.1499, 0.1555, 0.1566, 0.1486, 0.1314, 0.0946, 0.0147,
         0.0002],
        [0.0263, 0.1222, 0.1500, 0.1556, 0.1567, 0.1486, 0.1314, 0.0944, 0.0145,
         0.0002],
        [0.0263, 0.1222, 0.1500, 0.1556, 0.1567, 0.1487, 0.1314, 0.0944, 0.0144,
         0.0002],
        [0.0263, 0.1223, 0.1500, 0.

Epoch: 26.00, Train Loss: 3.82, Val Loss: 10.25, Train BLEU: 3.80, Val BLEU: 0.34
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> it &apos;s the the . . . <EOS> <EOS>
Attention Weights: tensor([[1.5734e-03, 3.7128e-03, 4.1322e-03, 4.2524e-03, 4.1891e-03, 3.9472e-03,
         3.4992e-03, 2.3120e-03, 1.2193e-04, 9.7226e-01],
        [1.0340e-04, 3.1791e-04, 3.6921e-04, 3.8391e-04, 3.7604e-04, 3.4749e-04,
         2.9564e-04, 1.7040e-04, 4.4887e-06, 9.9763e-01],
        [5.9713e-05, 1.9892e-04, 2.3344e-04, 2.4316e-04, 2.3775e-04, 2.1863e-04,
         1.8403e-04, 1.0228e-04, 2.1710e-06, 9.9852e-01],
        [4.0569e-05, 1.3986e-04, 1.6434e-04, 1.7121e-04, 1.6730e-04, 1.5357e-04,
         1.2877e-04, 7.0232e-05, 1.2760e-06, 9.9896e-01],
        [3.3658e-05, 1.1556e-04, 1.3547e-04, 1.4109e-04, 1.3788e-04, 1.2659e-04,
         1.0618e-04, 5.7709e-05, 9.9983e-07, 9.9914e-01],
        [3.0951

Epoch: 30.00, Train Loss: 3.73, Val Loss: 10.65, Train BLEU: 4.04, Val BLEU: 0.26
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> and the the the the the the the the
Attention Weights: tensor([[0.0547, 0.1098, 0.1234, 0.1290, 0.1278, 0.1264, 0.1198, 0.1065, 0.0812,
         0.0214],
        [0.0418, 0.1087, 0.1289, 0.1374, 0.1359, 0.1335, 0.1239, 0.1053, 0.0721,
         0.0123],
        [0.0384, 0.1080, 0.1303, 0.1396, 0.1380, 0.1354, 0.1250, 0.1050, 0.0699,
         0.0106],
        [0.0368, 0.1076, 0.1308, 0.1407, 0.1390, 0.1363, 0.1255, 0.1048, 0.0688,
         0.0097],
        [0.0364, 0.1078, 0.1313, 0.1412, 0.1394, 0.1366, 0.1256, 0.1045, 0.0680,
         0.0093],
        [0.0362, 0.1080, 0.1315, 0.1414, 0.1395, 0.1367, 0.1256, 0.1043, 0.0677,
         0.0091],
        [0.0362, 0.1081, 0.1316, 0.1415, 0.1396, 0.1367, 0.1256, 0.1043, 0.0675,
         0.0090],
        [0.0362, 0.1081, 0.1316, 

Epoch: 34.00, Train Loss: 3.65, Val Loss: 10.96, Train BLEU: 4.03, Val BLEU: 0.24
Sampling from training predictions...
Source: <UNK> 塞尔 <UNK> <UNK> 斯特 说 过 真正 的 探索
Reference: marcel proust said , &quot; the true voyage of
Model: <SOS> marcel the the , , , , , ,
Attention Weights: tensor([[1.3275e-09, 3.7548e-09, 1.4468e-08, 4.1622e-07, 1.2279e-02, 2.4698e-01,
         2.4501e-01, 2.3232e-01, 1.9166e-01, 7.1744e-02],
        [1.7130e-09, 3.4024e-09, 8.6186e-09, 1.0374e-07, 2.8515e-03, 2.6091e-01,
         2.7042e-01, 2.4939e-01, 1.8042e-01, 3.6015e-02],
        [3.8837e-09, 6.9180e-09, 1.5222e-08, 1.3095e-07, 2.0443e-03, 2.6130e-01,
         2.7667e-01, 2.5320e-01, 1.7694e-01, 2.9849e-02],
        [5.4565e-09, 9.3809e-09, 1.9785e-08, 1.5053e-07, 1.8672e-03, 2.6153e-01,
         2.7847e-01, 2.5408e-01, 1.7574e-01, 2.8314e-02],
        [6.1261e-09, 1.0436e-08, 2.1814e-08, 1.6092e-07, 1.8418e-03, 2.6131e-01,
         2.7907e-01, 2.5438e-01, 1.7543e-01, 2.7971e-02],
        [6.5492e-09, 1.1

In [None]:
summarize_results(load_experiment_log())[['dt_created', 'num_epochs', 'learning_rate', 'clip_grad_max_norm', 'val_loss']].head()

In [None]:
plot_single_learning_curve(results)

In [None]:
# Epoch: 199.00, Train Loss: 0.32, Val Loss: 13.19, Train BLEU: 98.94, Val BLEU: 0.27
plot_single_learning_curve(results)

In [None]:
# with attention energies = v_broadcast.bmm(torch.tanh(self.attn(concat)).transpose(1, 2)) # switched order  
# Epoch: 199.00, Train Loss: 0.63, Val Loss: 12.82, Train BLEU: 92.05, Val BLEU: 0.38
plot_single_learning_curve(results)

In [None]:
for i, token in enumerate(vocab[SRC_LANG]['id2token']): 
    if i < 20: 
        print("{}: {}".format(i, token))

In [None]:
for i, token in enumerate(vocab[TARG_LANG]['id2token']): 
    if i < 20: 
        print("{}: {}".format(i, token))

In [None]:
import torch
x = torch.arange(0, 3*5*10).view(3, 5, 10)
print(x)
y = x[1:, :, :]
print(y)
z = y.view(-1, 10)
print(z)

In [None]:
t = torch.arange(0, 2*5).view(5, 2)
print(t)
u = t.contiguous().view(-1)
print(u)
v = t.permute(1, 0)
print(v)
w = v.contiguous().view(-1)
print(w)

In [None]:
a = torch.arange(0, 2*1*300)
print(a)
b = a.view(-1, 1, 300)
print(b.size())

In [None]:
for i, (src_idxs, targ_idxs, src_lens, targ_lens) in enumerate(full_loaders['train']):
#     print(i)
#     print(src_idxs.size())
#     print(src_idxs)
#     print(src_lens)
#     print(targ_idxs.size())
#     print(targ_idxs)
#     print(targ_lens)
    id2token = vocab[SRC_LANG]['id2token']
    test_tensor = src_idxs
    list_of_lists = test_tensor.numpy().astype(int).tolist()
    to_token = lambda l: ' '.join([id2token[idx] for idx in l])
    list_of_lists_tokens = [to_token(l) for l in list_of_lists] 
    break 