In [1]:
import numpy as np 
import pandas as pd 
from data_processing import generate_vocab, process_data, create_dataloaders 
from model import get_pretrained_emb, EncoderDecoder, EncoderRNN, DecoderRNN, DecoderSimpleRNN, EncoderSimpleRNN, \
    Attention, DecoderAttnRNN, DecoderRNNV2, EncoderDecoderAttention
from train_eval import count_parameters, summarize_results, \
    plot_single_learning_curve, load_experiment_log
from train_eval import train_and_eval, train_and_eval_attn 
import importlib
import pickle as pkl 
import torch

In [2]:
# model identification
MODEL_NAME = 'zh-seq2seq-rnn-attention'
SRC_LANG = 'zh'
TARG_LANG = 'en'

# data processing params  
SRC_MAX_SENTENCE_LEN = 10
TARG_MAX_SENTENCE_LEN = 10
SRC_VOCAB_SIZE = 30000 #30000
TARG_VOCAB_SIZE = 30000 #30000

# model architecture params 
NUM_LAYERS = 2 #2 
ENC_HIDDEN_DIM = 300 
DEC_HIDDEN_DIM = ENC_HIDDEN_DIM #2 * ENC_HIDDEN_DIM 
TEACHER_FORCING_RATIO = 1
CLIP_GRAD_MAX_NORM = 1
ENC_DROPOUT = 0 # to actually implement
DEC_DROPOUT = 0 # to actually implement

# training params  
BATCH_SIZE = 32 #32
NUM_EPOCHS = 200
LR = 0.0005 # 0.0005
OPTIMIZER = 'Adam'
LAZY_TRAIN = True

In [3]:
# store as dict to save to results later 
params = {'model_name': MODEL_NAME, 'src_lang': SRC_LANG, 'targ_lang': TARG_LANG, 
          'src_max_sentence_len': SRC_MAX_SENTENCE_LEN, 'targ_max_sentence_len': TARG_MAX_SENTENCE_LEN, 
          'src_vocab_size': SRC_VOCAB_SIZE, 'targ_vocab_size': TARG_VOCAB_SIZE, 
          'num_layers': NUM_LAYERS, 'enc_hidden_dim': ENC_HIDDEN_DIM, 'dec_hidden_dim': DEC_HIDDEN_DIM,
          'teacher_forcing_ratio': TEACHER_FORCING_RATIO, 'clip_grad_max_norm': CLIP_GRAD_MAX_NORM,
          'enc_dropout': ENC_DROPOUT, 'dec_dropout': DEC_DROPOUT, 
          'batch_size': BATCH_SIZE, 'num_epochs': NUM_EPOCHS, 'learning_rate': LR, 'optimizer': OPTIMIZER, 
          'lazy_train': LAZY_TRAIN} 

In [4]:
#vocab_test = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)

In [5]:
#vocab['zh']['id2token'][987]

In [6]:
#vocab['zh']['token2id']['森林']

In [7]:
#vocab['en']['token2id']['activity']

In [8]:
#vocab['en']['id2token'][987]

In [9]:
# # takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [10]:
# reload from pickle 
vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
vocab = pkl.load(open(vocab_filename, "rb"))
data = process_data(SRC_LANG, TARG_LANG, vocab)
data_minibatch = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=BATCH_SIZE) 
data_minitrain = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=1000)

In [11]:
# # takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab-fake.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [12]:
# vocab_filename = "{}-{}-vocab-fake.p".format(SRC_LANG, TARG_LANG)
# vocab = pkl.load(open(vocab_filename, "rb"))
# data = process_data(SRC_LANG, TARG_LANG, vocab)
# limited_data = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=BATCH_SIZE) 

In [13]:
# create dataloaders 
loaders_full = create_dataloaders(data, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minibatch = create_dataloaders(data_minibatch, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minitrain = create_dataloaders(data_minitrain, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)

In [14]:
# define model 

# encoder = EncoderRNN(enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
#                      pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))
encoder = EncoderSimpleRNN(enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
                           pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))

# decoder = DecoderRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS,
#                       targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                       pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))

# decoder = DecoderRNNV2(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
#                        targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                        pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))
# decoder = DecoderSimpleRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
#                            targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                            pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))
decoder = DecoderAttnRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
                         targ_vocab_size=TARG_VOCAB_SIZE, src_max_sentence_len=SRC_MAX_SENTENCE_LEN, 
                         targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
                         pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))

model = EncoderDecoderAttention(encoder, decoder, vocab[TARG_LANG]['token2id']) 

In [15]:
model, results = train_and_eval_attn(
    model=model, loaders_full=loaders_full, loaders_minibatch=loaders_minibatch, loaders_minitrain=loaders_minitrain, 
    params=params, vocab=vocab, print_intermediate=True, save_checkpoint=True, save_to_log=True, 
    lazy_eval=False, inspect_samples=1)

Epoch: 0.00, Train Loss: 10.13, Val Loss: 10.23, Train BLEU: 0.32, Val BLEU: 0.18
Sampling from training predictions...
Source: 但 我 想 告诉 你 的 是 当 你 站
Reference: but when you &apos;re standing at the beach ,
Model: <SOS> numbered the the the the the the the the
Attention Weights: tensor([[0.0859, 0.0922, 0.0969, 0.0986, 0.1013, 0.1034, 0.1047, 0.1058, 0.1066,
         0.1046],
        [0.0855, 0.0921, 0.0969, 0.0986, 0.1014, 0.1035, 0.1048, 0.1059, 0.1067,
         0.1045],
        [0.0855, 0.0921, 0.0969, 0.0987, 0.1014, 0.1036, 0.1048, 0.1059, 0.1067,
         0.1045],
        [0.0854, 0.0920, 0.0969, 0.0987, 0.1014, 0.1036, 0.1048, 0.1059, 0.1068,
         0.1046],
        [0.0853, 0.0920, 0.0969, 0.0986, 0.1014, 0.1036, 0.1048, 0.1059, 0.1068,
         0.1046],
        [0.0853, 0.0920, 0.0969, 0.0986, 0.1014, 0.1036, 0.1049, 0.1060, 0.1068,
         0.1046],
        [0.0853, 0.0920, 0.0969, 0.0986, 0.1014, 0.1036, 0.1049, 0.1060, 0.1068,
         0.1046],
        [0.0853, 0.0920, 0.0

Epoch: 4.00, Train Loss: 9.01, Val Loss: 9.72, Train BLEU: 0.32, Val BLEU: 0.21
Sampling from training predictions...
Source: 但 我 想 告诉 你 的 是 当 你 站
Reference: but when you &apos;re standing at the beach ,
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.0775, 0.0951, 0.1034, 0.1060, 0.1099, 0.1101, 0.1096, 0.1076, 0.1000,
         0.0807],
        [0.0749, 0.0944, 0.1040, 0.1071, 0.1116, 0.1117, 0.1110, 0.1082, 0.0994,
         0.0777],
        [0.0738, 0.0943, 0.1044, 0.1077, 0.1124, 0.1125, 0.1115, 0.1083, 0.0988,
         0.0764],
        [0.0731, 0.0941, 0.1046, 0.1080, 0.1128, 0.1129, 0.1118, 0.1083, 0.0986,
         0.0759],
        [0.0728, 0.0939, 0.1046, 0.1081, 0.1130, 0.1131, 0.1119, 0.1084, 0.0985,
         0.0756],
        [0.0726, 0.0938, 0.1046, 0.1082, 0.1131, 0.1132, 0.1120, 0.1084, 0.0985,
         0.0756],
        [0.0726, 0.0938, 0.1046, 0.1082, 0.1131, 0.1132, 0.1120, 0.1084, 0.0985,
         0.0755],
        [0.0726, 0.0937, 0.1046, 0.

Epoch: 8.00, Train Loss: 7.51, Val Loss: 9.11, Train BLEU: 0.32, Val BLEU: 0.22
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.0934, 0.1151, 0.1204, 0.1191, 0.1164, 0.1175, 0.1135, 0.0903, 0.0511,
         0.0632],
        [0.0629, 0.0836, 0.0900, 0.0891, 0.0867, 0.0883, 0.0832, 0.0612, 0.0308,
         0.3242],
        [0.0459, 0.0627, 0.0682, 0.0676, 0.0656, 0.0670, 0.0625, 0.0448, 0.0219,
         0.4936],
        [0.0371, 0.0514, 0.0563, 0.0559, 0.0542, 0.0553, 0.0512, 0.0361, 0.0174,
         0.5852],
        [0.0332, 0.0464, 0.0509, 0.0505, 0.0490, 0.0500, 0.0461, 0.0323, 0.0155,
         0.6259],
        [0.0318, 0.0445, 0.0488, 0.0484, 0.0470, 0.0479, 0.0441, 0.0309, 0.0148,
         0.6418],
        [0.0313, 0.0438, 0.0480, 0.0476, 0.0463, 0.0472, 0.0434, 0.0304, 0.0145,
         0.6475],
        [0.0312, 0.0436,

Epoch: 12.00, Train Loss: 5.97, Val Loss: 8.65, Train BLEU: 0.29, Val BLEU: 0.21
Sampling from training predictions...
Source: 这 是 一只 水母 <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: here &apos;s a jelly . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> the the the the the the the the .
Attention Weights: tensor([[0.1487, 0.1719, 0.1648, 0.1429, 0.0767, 0.0590, 0.0590, 0.0590, 0.0590,
         0.0590],
        [0.0445, 0.0568, 0.0551, 0.0448, 0.0197, 0.1558, 0.1558, 0.1558, 0.1558,
         0.1558],
        [0.0223, 0.0295, 0.0288, 0.0228, 0.0094, 0.1774, 0.1774, 0.1774, 0.1774,
         0.1774],
        [0.0146, 0.0197, 0.0193, 0.0150, 0.0060, 0.1851, 0.1851, 0.1851, 0.1851,
         0.1851],
        [0.0113, 0.0153, 0.0150, 0.0116, 0.0046, 0.1884, 0.1884, 0.1884, 0.1884,
         0.1884],
        [0.0096, 0.0132, 0.0130, 0.0099, 0.0039, 0.1901, 0.1901, 0.1901, 0.1901,
         0.1901],
        [0.0088, 0.0120, 0.0118, 0.0090, 0.0036, 0.1910, 0.1910, 0.1910, 0.1910,
         0.1910],
        [0

Epoch: 16.00, Train Loss: 4.89, Val Loss: 8.68, Train BLEU: 0.36, Val BLEU: 0.24
Sampling from training predictions...
Source: 我们 这 有 不少 精彩 的 泰坦 泰坦尼克 坦尼 尼克
Reference: we &apos;ve got some of the most incredible video
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.1029, 0.1100, 0.1047, 0.1032, 0.1024, 0.1009, 0.0975, 0.0912, 0.0932,
         0.0940],
        [0.0934, 0.1101, 0.1078, 0.1072, 0.1067, 0.1050, 0.1008, 0.0915, 0.0918,
         0.0855],
        [0.0906, 0.1109, 0.1096, 0.1090, 0.1084, 0.1066, 0.1018, 0.0911, 0.0904,
         0.0818],
        [0.0886, 0.1107, 0.1102, 0.1098, 0.1094, 0.1075, 0.1026, 0.0915, 0.0900,
         0.0797],
        [0.0871, 0.1104, 0.1105, 0.1103, 0.1100, 0.1082, 0.1033, 0.0919, 0.0899,
         0.0785],
        [0.0860, 0.1102, 0.1107, 0.1107, 0.1104, 0.1086, 0.1038, 0.0922, 0.0898,
         0.0776],
        [0.0852, 0.1100, 0.1109, 0.1110, 0.1107, 0.1090, 0.1041, 0.0924, 0.0897,
         0.0770],
        [0.0845, 0.1098

Epoch: 20.00, Train Loss: 4.35, Val Loss: 9.04, Train BLEU: 3.89, Val BLEU: 0.22
Sampling from training predictions...
Source: 大多 大多数 多数 地震 和 火山 喷发 也 都 发生
Reference: most of the earthquakes and volcanoes are in the
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.1016, 0.1059, 0.1010, 0.0996, 0.0990, 0.0989, 0.0987, 0.0988, 0.0992,
         0.0974],
        [0.0898, 0.1024, 0.1022, 0.1025, 0.1026, 0.1030, 0.1029, 0.1030, 0.1022,
         0.0894],
        [0.0866, 0.1018, 0.1032, 0.1038, 0.1041, 0.1044, 0.1042, 0.1042, 0.1025,
         0.0852],
        [0.0842, 0.1012, 0.1037, 0.1046, 0.1052, 0.1054, 0.1052, 0.1052, 0.1028,
         0.0824],
        [0.0824, 0.1007, 0.1041, 0.1052, 0.1059, 0.1062, 0.1060, 0.1059, 0.1030,
         0.0807],
        [0.0813, 0.1004, 0.1043, 0.1056, 0.1063, 0.1066, 0.1064, 0.1062, 0.1031,
         0.0797],
        [0.0806, 0.1002, 0.1045, 0.1059, 0.1066, 0.1069, 0.1067, 0.1065, 0.1031,
         0.0790],
        [0.0800, 0.1001, 

Epoch: 24.00, Train Loss: 4.07, Val Loss: 9.56, Train BLEU: 3.97, Val BLEU: 0.30
Sampling from training predictions...
Source: 大卫 <UNK> 这位 是 比尔 <UNK> 我 是 大卫 <UNK>
Reference: this is bill lange . i &apos;m dave gallo
Model: <SOS> it the the the the , , , ,
Attention Weights: tensor([[0.1033, 0.1068, 0.1014, 0.1004, 0.1010, 0.1041, 0.1009, 0.1004, 0.0998,
         0.0819],
        [0.0898, 0.1046, 0.1041, 0.1046, 0.1051, 0.1085, 0.1063, 0.1057, 0.1017,
         0.0697],
        [0.0864, 0.1043, 0.1056, 0.1063, 0.1062, 0.1101, 0.1086, 0.1074, 0.1010,
         0.0640],
        [0.0834, 0.1037, 0.1066, 0.1076, 0.1070, 0.1112, 0.1103, 0.1089, 0.1010,
         0.0601],
        [0.0816, 0.1035, 0.1071, 0.1083, 0.1075, 0.1117, 0.1112, 0.1098, 0.1011,
         0.0581],
        [0.0807, 0.1034, 0.1074, 0.1087, 0.1078, 0.1120, 0.1117, 0.1102, 0.1011,
         0.0570],
        [0.0801, 0.1033, 0.1077, 0.1090, 0.1080, 0.1122, 0.1120, 0.1104, 0.1011,
         0.0563],
        [0.0796, 0.1033, 0.1078,

Epoch: 28.00, Train Loss: 3.94, Val Loss: 10.08, Train BLEU: 2.91, Val BLEU: 0.22
Sampling from training predictions...
Source: 海洋 的 平均 深度 是 两英里 英里 <EOS> <PAD> <PAD>
Reference: the average depth is about two miles . <EOS>
Model: <SOS> it the the the <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0830, 0.0824, 0.0815, 0.0815, 0.0800, 0.0793, 0.0759, 0.0520, 0.1922,
         0.1922],
        [0.0715, 0.0809, 0.0827, 0.0836, 0.0828, 0.0825, 0.0773, 0.0441, 0.1973,
         0.1973],
        [0.0534, 0.0617, 0.0634, 0.0642, 0.0634, 0.0627, 0.0580, 0.0299, 0.2717,
         0.2717],
        [0.0330, 0.0396, 0.0410, 0.0416, 0.0412, 0.0405, 0.0371, 0.0171, 0.3544,
         0.3544],
        [0.0171, 0.0214, 0.0224, 0.0228, 0.0227, 0.0222, 0.0202, 0.0085, 0.4213,
         0.4213],
        [0.0090, 0.0117, 0.0123, 0.0126, 0.0125, 0.0123, 0.0111, 0.0044, 0.4571,
         0.4571],
        [0.0060, 0.0080, 0.0084, 0.0086, 0.0086, 0.0084, 0.0075, 0.0029, 0.4708,
         0.4708],
        

Epoch: 32.00, Train Loss: 3.84, Val Loss: 10.42, Train BLEU: 4.82, Val BLEU: 0.23
Sampling from training predictions...
Source: 和 我们 合作 的 人们 帮 我们 找到 了 新
Reference: people that have partnered with us have given us
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.1024, 0.0992, 0.0999, 0.0996, 0.1002, 0.0995, 0.0995, 0.0993, 0.0981,
         0.1022],
        [0.0910, 0.0986, 0.1015, 0.1019, 0.1028, 0.1022, 0.1024, 0.1020, 0.1004,
         0.0973],
        [0.0891, 0.0983, 0.1018, 0.1024, 0.1033, 0.1028, 0.1030, 0.1025, 0.1009,
         0.0960],
        [0.0873, 0.0982, 0.1020, 0.1027, 0.1038, 0.1033, 0.1035, 0.1029, 0.1013,
         0.0950],
        [0.0851, 0.0982, 0.1024, 0.1033, 0.1044, 0.1040, 0.1041, 0.1035, 0.1018,
         0.0932],
        [0.0836, 0.0982, 0.1028, 0.1038, 0.1048, 0.1045, 0.1046, 0.1039, 0.1020,
         0.0918],
        [0.0827, 0.0983, 0.1030, 0.1041, 0.1051, 0.1049, 0.1049, 0.1041, 0.1021,
         0.0908],
        [0.0821, 0.0983, 0.

Epoch: 36.00, Train Loss: 3.74, Val Loss: 10.62, Train BLEU: 7.99, Val BLEU: 1.06
Sampling from training predictions...
Source: 当 你 站 在 海滩 上 或是 当 你 看到
Reference: part of the problem , i think , is
Model: <SOS> it &apos;s the the the the the , ,
Attention Weights: tensor([[0.1014, 0.0973, 0.0985, 0.1000, 0.1012, 0.1003, 0.1009, 0.0990, 0.0981,
         0.1033],
        [0.0910, 0.0974, 0.1002, 0.1022, 0.1036, 0.1027, 0.1034, 0.1014, 0.1001,
         0.0980],
        [0.0897, 0.0975, 0.1006, 0.1027, 0.1041, 0.1033, 0.1038, 0.1016, 0.1001,
         0.0965],
        [0.0888, 0.0977, 0.1010, 0.1031, 0.1046, 0.1037, 0.1042, 0.1019, 0.1002,
         0.0949],
        [0.0869, 0.0981, 0.1017, 0.1038, 0.1053, 0.1044, 0.1048, 0.1025, 0.1004,
         0.0921],
        [0.0853, 0.0984, 0.1023, 0.1045, 0.1059, 0.1050, 0.1054, 0.1030, 0.1006,
         0.0898],
        [0.0842, 0.0985, 0.1027, 0.1049, 0.1063, 0.1054, 0.1057, 0.1033, 0.1006,
         0.0884],
        [0.0834, 0.0986, 0.1029, 0.1051, 0.

Epoch: 40.00, Train Loss: 3.64, Val Loss: 10.79, Train BLEU: 6.91, Val BLEU: 0.25
Sampling from training predictions...
Source: 但 我 想 告诉 你 的 是 当 你 站
Reference: but when you &apos;re standing at the beach ,
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.0985, 0.0974, 0.0999, 0.1021, 0.1015, 0.1018, 0.1011, 0.1000, 0.0981,
         0.0995],
        [0.0894, 0.0971, 0.1010, 0.1035, 0.1031, 0.1036, 0.1030, 0.1020, 0.1001,
         0.0971],
        [0.0883, 0.0973, 0.1015, 0.1041, 0.1037, 0.1041, 0.1033, 0.1020, 0.0997,
         0.0960],
        [0.0876, 0.0977, 0.1019, 0.1045, 0.1041, 0.1045, 0.1036, 0.1020, 0.0994,
         0.0946],
        [0.0858, 0.0981, 0.1027, 0.1053, 0.1050, 0.1052, 0.1042, 0.1025, 0.0993,
         0.0919],
        [0.0842, 0.0985, 0.1034, 0.1060, 0.1057, 0.1058, 0.1047, 0.1028, 0.0992,
         0.0897],
        [0.0831, 0.0988, 0.1038, 0.1064, 0.1061, 0.1062, 0.1050, 0.1030, 0.0992,
         0.0884],
        [0.0822, 0.0989, 0.1041, 0

Epoch: 44.00, Train Loss: 3.55, Val Loss: 10.90, Train BLEU: 7.42, Val BLEU: 0.25
Sampling from training predictions...
Source: 海洋 里 生物 的 多样 多样性 和 密度 要 比
Reference: the biodiversity and the <UNK> in the ocean is
Model: <SOS> we the the the the the the the the
Attention Weights: tensor([[0.0970, 0.0968, 0.1001, 0.1006, 0.1025, 0.1100, 0.1000, 0.0993, 0.0962,
         0.0975],
        [0.0877, 0.0960, 0.1011, 0.1021, 0.1044, 0.1108, 0.1018, 0.1016, 0.0987,
         0.0957],
        [0.0867, 0.0963, 0.1016, 0.1026, 0.1050, 0.1119, 0.1020, 0.1014, 0.0979,
         0.0946],
        [0.0863, 0.0965, 0.1019, 0.1028, 0.1053, 0.1125, 0.1021, 0.1014, 0.0976,
         0.0936],
        [0.0847, 0.0969, 0.1025, 0.1034, 0.1059, 0.1131, 0.1028, 0.1018, 0.0975,
         0.0915],
        [0.0824, 0.0974, 0.1032, 0.1043, 0.1066, 0.1134, 0.1036, 0.1023, 0.0976,
         0.0891],
        [0.0806, 0.0977, 0.1038, 0.1050, 0.1072, 0.1137, 0.1042, 0.1027, 0.0976,
         0.0876],
        [0.0795, 0.0978, 0.1

Epoch: 48.00, Train Loss: 3.44, Val Loss: 10.97, Train BLEU: 8.03, Val BLEU: 0.31
Sampling from training predictions...
Source: 大家 想想 海洋 占 了 地球 球面 面积 的 75
Reference: when you think about it , the oceans are
Model: <SOS> it &apos;s the the the , , , ,
Attention Weights: tensor([[0.1025, 0.1011, 0.1027, 0.1024, 0.1028, 0.1035, 0.1023, 0.0995, 0.1018,
         0.0814],
        [0.0927, 0.1021, 0.1062, 0.1061, 0.1069, 0.1080, 0.1069, 0.1047, 0.1039,
         0.0625],
        [0.0929, 0.1032, 0.1076, 0.1073, 0.1080, 0.1090, 0.1076, 0.1050, 0.1036,
         0.0558],
        [0.0914, 0.1043, 0.1092, 0.1090, 0.1097, 0.1105, 0.1090, 0.1060, 0.1030,
         0.0477],
        [0.0891, 0.1054, 0.1107, 0.1108, 0.1114, 0.1120, 0.1104, 0.1070, 0.1017,
         0.0415],
        [0.0867, 0.1064, 0.1120, 0.1124, 0.1129, 0.1134, 0.1115, 0.1077, 0.1001,
         0.0370],
        [0.0848, 0.1070, 0.1130, 0.1135, 0.1139, 0.1143, 0.1124, 0.1082, 0.0988,
         0.0342],
        [0.0834, 0.1074, 0.1136, 0.11

Epoch: 52.00, Train Loss: 3.34, Val Loss: 11.06, Train BLEU: 7.54, Val BLEU: 0.30
Sampling from training predictions...
Source: 其实 它们 都 是 由 单独 的 动物 结合 合在
Reference: these are all individual animals banding together to make
Model: <SOS> these the the , , , , , ,
Attention Weights: tensor([[0.1013, 0.1020, 0.1027, 0.1039, 0.1041, 0.1042, 0.1017, 0.0990, 0.0993,
         0.0817],
        [0.0936, 0.1029, 0.1052, 0.1068, 0.1072, 0.1079, 0.1055, 0.1047, 0.1049,
         0.0612],
        [0.0936, 0.1038, 0.1061, 0.1077, 0.1080, 0.1086, 0.1059, 0.1051, 0.1053,
         0.0559],
        [0.0923, 0.1053, 0.1082, 0.1098, 0.1101, 0.1105, 0.1077, 0.1061, 0.1038,
         0.0463],
        [0.0905, 0.1066, 0.1100, 0.1116, 0.1118, 0.1120, 0.1092, 0.1067, 0.1014,
         0.0401],
        [0.0887, 0.1075, 0.1113, 0.1129, 0.1132, 0.1132, 0.1103, 0.1071, 0.0996,
         0.0363],
        [0.0871, 0.1080, 0.1122, 0.1138, 0.1140, 0.1140, 0.1110, 0.1074, 0.0984,
         0.0339],
        [0.0859, 0.1083, 0

Epoch: 56.00, Train Loss: 3.24, Val Loss: 11.10, Train BLEU: 8.39, Val BLEU: 0.26
Sampling from training predictions...
Source: 我们 得用 非常 特殊 的 仪器 才能 能到 到达 那个
Reference: we have to have a very special technology to
Model: <SOS> we the the the the the the the the
Attention Weights: tensor([[0.0930, 0.1021, 0.1013, 0.1016, 0.1010, 0.1024, 0.1025, 0.1062, 0.0959,
         0.0940],
        [0.0871, 0.0995, 0.1013, 0.1024, 0.1022, 0.1040, 0.1048, 0.1065, 0.0977,
         0.0944],
        [0.0859, 0.1008, 0.1017, 0.1027, 0.1023, 0.1042, 0.1051, 0.1073, 0.0969,
         0.0931],
        [0.0850, 0.1020, 0.1025, 0.1030, 0.1024, 0.1043, 0.1052, 0.1080, 0.0962,
         0.0914],
        [0.0830, 0.1029, 0.1034, 0.1038, 0.1031, 0.1049, 0.1058, 0.1085, 0.0960,
         0.0886],
        [0.0806, 0.1034, 0.1044, 0.1048, 0.1041, 0.1057, 0.1064, 0.1084, 0.0962,
         0.0861],
        [0.0786, 0.1037, 0.1050, 0.1056, 0.1049, 0.1063, 0.1068, 0.1083, 0.0964,
         0.0844],
        [0.0771, 0.1039, 0.

Epoch: 60.00, Train Loss: 3.13, Val Loss: 11.11, Train BLEU: 8.55, Val BLEU: 0.28
Sampling from training predictions...
Source: 深海 海中 的 生命 大卫 <UNK> <EOS> <PAD> <PAD> <PAD>
Reference: life in the deep oceans <EOS> <PAD> <PAD> <PAD>
Model: <SOS> it &apos;s a . <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0585, 0.0589, 0.0577, 0.0587, 0.0594, 0.0373, 0.0360, 0.2112, 0.2112,
         0.2112],
        [0.0523, 0.0569, 0.0572, 0.0590, 0.0592, 0.0320, 0.0332, 0.2167, 0.2167,
         0.2167],
        [0.0413, 0.0461, 0.0463, 0.0482, 0.0478, 0.0211, 0.0240, 0.2417, 0.2417,
         0.2417],
        [0.0211, 0.0250, 0.0251, 0.0258, 0.0245, 0.0086, 0.0103, 0.2866, 0.2866,
         0.2866],
        [0.0096, 0.0123, 0.0125, 0.0126, 0.0114, 0.0033, 0.0039, 0.3114, 0.3114,
         0.3114],
        [0.0054, 0.0073, 0.0075, 0.0074, 0.0065, 0.0018, 0.0021, 0.3207, 0.3207,
         0.3207],
        [0.0042, 0.0058, 0.0059, 0.0058, 0.0050, 0.0014, 0.0016, 0.3234, 0.3234,
         0.3234],

Epoch: 64.00, Train Loss: 3.02, Val Loss: 11.19, Train BLEU: 9.53, Val BLEU: 0.29
Sampling from training predictions...
Source: 泰坦 泰坦尼克 泰坦尼克号 坦尼 尼克 号 是 拿 了 不少
Reference: the truth of the matter is that the titanic
Model: <SOS> we the the the the the the the the
Attention Weights: tensor([[0.0975, 0.1107, 0.1134, 0.1159, 0.1015, 0.0953, 0.0944, 0.0926, 0.0901,
         0.0886],
        [0.0944, 0.1080, 0.1128, 0.1141, 0.1011, 0.0961, 0.0953, 0.0936, 0.0919,
         0.0927],
        [0.0940, 0.1113, 0.1150, 0.1146, 0.1006, 0.0954, 0.0945, 0.0925, 0.0906,
         0.0915],
        [0.0927, 0.1120, 0.1159, 0.1162, 0.1018, 0.0957, 0.0945, 0.0922, 0.0898,
         0.0893],
        [0.0895, 0.1120, 0.1169, 0.1180, 0.1033, 0.0968, 0.0954, 0.0927, 0.0897,
         0.0857],
        [0.0859, 0.1121, 0.1181, 0.1199, 0.1044, 0.0979, 0.0963, 0.0933, 0.0898,
         0.0822],
        [0.0832, 0.1121, 0.1192, 0.1214, 0.1051, 0.0986, 0.0968, 0.0937, 0.0899,
         0.0800],
        [0.0817, 0.1121, 0

Epoch: 68.00, Train Loss: 2.91, Val Loss: 11.26, Train BLEU: 10.24, Val BLEU: 0.27
Sampling from training predictions...
Source: 我们 将 用 一些 影片 来讲 讲述 一些 深海 海里
Reference: and we &apos;re going to tell you some stories
Model: <SOS> and of the the the the the the the
Attention Weights: tensor([[0.0963, 0.0973, 0.1000, 0.1047, 0.1043, 0.1033, 0.1023, 0.1015, 0.0975,
         0.0927],
        [0.0923, 0.0965, 0.0995, 0.1048, 0.1040, 0.1031, 0.1022, 0.1017, 0.0992,
         0.0967],
        [0.0912, 0.0963, 0.0993, 0.1058, 0.1048, 0.1037, 0.1027, 0.1020, 0.0988,
         0.0956],
        [0.0899, 0.0964, 0.0997, 0.1068, 0.1056, 0.1043, 0.1031, 0.1023, 0.0984,
         0.0935],
        [0.0879, 0.0967, 0.1004, 0.1078, 0.1067, 0.1053, 0.1040, 0.1028, 0.0982,
         0.0904],
        [0.0856, 0.0973, 0.1013, 0.1086, 0.1076, 0.1061, 0.1047, 0.1032, 0.0981,
         0.0876],
        [0.0835, 0.0978, 0.1021, 0.1092, 0.1083, 0.1068, 0.1053, 0.1036, 0.0980,
         0.0854],
        [0.0816, 0.0982, 

Epoch: 72.00, Train Loss: 2.81, Val Loss: 11.32, Train BLEU: 10.85, Val BLEU: 0.26
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> it &apos;s got to is is about . .
Attention Weights: tensor([[0.0905, 0.0914, 0.0930, 0.0993, 0.1124, 0.0950, 0.0883, 0.0830, 0.0585,
         0.1884],
        [0.0837, 0.0875, 0.0901, 0.0984, 0.1121, 0.0921, 0.0869, 0.0843, 0.0587,
         0.2061],
        [0.0761, 0.0800, 0.0828, 0.0930, 0.1086, 0.0843, 0.0793, 0.0784, 0.0531,
         0.2644],
        [0.0631, 0.0677, 0.0706, 0.0807, 0.0969, 0.0720, 0.0670, 0.0663, 0.0408,
         0.3749],
        [0.0466, 0.0524, 0.0549, 0.0626, 0.0763, 0.0561, 0.0519, 0.0508, 0.0267,
         0.5217],
        [0.0311, 0.0374, 0.0394, 0.0444, 0.0543, 0.0402, 0.0369, 0.0353, 0.0158,
         0.6653],
        [0.0195, 0.0250, 0.0265, 0.0296, 0.0364, 0.0270, 0.0246, 0.0230, 0.0090,
         0.7793],
        [0.0130, 0.0174

Epoch: 76.00, Train Loss: 2.70, Val Loss: 11.37, Train BLEU: 12.52, Val BLEU: 0.27
Sampling from training predictions...
Source: 我们 用 的 是 深海 潜水 潜水艇 <UNK> 号 和
Reference: we use the submarine alvin and we use cameras
Model: <SOS> we the the the the the the the the
Attention Weights: tensor([[0.0976, 0.1008, 0.1033, 0.1045, 0.1098, 0.1109, 0.0860, 0.0839, 0.1038,
         0.0994],
        [0.0939, 0.0981, 0.1017, 0.1063, 0.1148, 0.1176, 0.0862, 0.0837, 0.0998,
         0.0979],
        [0.0931, 0.0971, 0.1016, 0.1081, 0.1213, 0.1264, 0.0836, 0.0778, 0.0965,
         0.0946],
        [0.0941, 0.0989, 0.1039, 0.1108, 0.1254, 0.1292, 0.0745, 0.0697, 0.0987,
         0.0950],
        [0.0941, 0.1010, 0.1065, 0.1136, 0.1291, 0.1307, 0.0670, 0.0623, 0.1007,
         0.0952],
        [0.0933, 0.1029, 0.1088, 0.1159, 0.1315, 0.1309, 0.0624, 0.0577, 0.1018,
         0.0948],
        [0.0922, 0.1043, 0.1104, 0.1173, 0.1323, 0.1304, 0.0603, 0.0554, 0.1027,
         0.0947],
        [0.0911, 0.1054, 

Epoch: 80.00, Train Loss: 2.60, Val Loss: 11.40, Train BLEU: 13.66, Val BLEU: 0.26
Sampling from training predictions...
Source: <UNK> 塞尔 <UNK> <UNK> 斯特 说 过 真正 的 探索
Reference: marcel proust said , &quot; the true voyage of
Model: <SOS> marcel proust said said &quot; &quot; true voyage voyage
Attention Weights: tensor([[0.0844, 0.0909, 0.0894, 0.0962, 0.1140, 0.1096, 0.1070, 0.1085, 0.1015,
         0.0985],
        [0.0669, 0.0738, 0.0734, 0.0842, 0.1237, 0.1189, 0.1167, 0.1186, 0.1117,
         0.1121],
        [0.0552, 0.0604, 0.0590, 0.0681, 0.1222, 0.1295, 0.1281, 0.1306, 0.1226,
         0.1244],
        [0.0407, 0.0449, 0.0442, 0.0510, 0.1155, 0.1445, 0.1438, 0.1466, 0.1363,
         0.1324],
        [0.0279, 0.0311, 0.0309, 0.0368, 0.1040, 0.1591, 0.1606, 0.1633, 0.1502,
         0.1361],
        [0.0236, 0.0262, 0.0262, 0.0320, 0.0988, 0.1645, 0.1675, 0.1702, 0.1554,
         0.1356],
        [0.0219, 0.0244, 0.0245, 0.0302, 0.0966, 0.1665, 0.1705, 0.1731, 0.1576,
         0.13

Epoch: 84.00, Train Loss: 2.49, Val Loss: 11.43, Train BLEU: 14.59, Val BLEU: 0.27
Sampling from training predictions...
Source: <UNK> 塞尔 <UNK> <UNK> 斯特 说 过 真正 的 探索
Reference: marcel proust said , &quot; the true voyage of
Model: <SOS> marcel proust said said &quot; true true voyage voyage
Attention Weights: tensor([[0.0915, 0.0982, 0.0968, 0.1020, 0.1120, 0.1032, 0.1016, 0.1033, 0.0971,
         0.0944],
        [0.0740, 0.0813, 0.0808, 0.0906, 0.1215, 0.1123, 0.1112, 0.1133, 0.1073,
         0.1077],
        [0.0618, 0.0674, 0.0657, 0.0744, 0.1214, 0.1228, 0.1225, 0.1253, 0.1181,
         0.1205],
        [0.0455, 0.0499, 0.0489, 0.0555, 0.1150, 0.1390, 0.1399, 0.1429, 0.1335,
         0.1298],
        [0.0309, 0.0341, 0.0338, 0.0396, 0.1033, 0.1548, 0.1585, 0.1614, 0.1490,
         0.1346],
        [0.0263, 0.0290, 0.0289, 0.0347, 0.0983, 0.1602, 0.1656, 0.1683, 0.1544,
         0.1343],
        [0.0244, 0.0270, 0.0270, 0.0328, 0.0959, 0.1622, 0.1687, 0.1714, 0.1567,
         0.1338

Epoch: 88.00, Train Loss: 2.40, Val Loss: 11.49, Train BLEU: 14.69, Val BLEU: 0.26
Sampling from training predictions...
Source: 我们 这 有 不少 精彩 的 泰坦 泰坦尼克 坦尼 尼克
Reference: we &apos;ve got some of the most incredible video
Model: <SOS> we of the the the the the the the
Attention Weights: tensor([[0.0889, 0.0939, 0.0976, 0.1046, 0.1038, 0.1017, 0.1075, 0.1057, 0.1072,
         0.0891],
        [0.0874, 0.0923, 0.0958, 0.1031, 0.1023, 0.1007, 0.1099, 0.1081, 0.1095,
         0.0908],
        [0.0852, 0.0890, 0.0927, 0.1032, 0.1019, 0.0991, 0.1121, 0.1142, 0.1142,
         0.0885],
        [0.0838, 0.0879, 0.0919, 0.1039, 0.1024, 0.0989, 0.1145, 0.1148, 0.1159,
         0.0862],
        [0.0814, 0.0866, 0.0913, 0.1045, 0.1027, 0.0991, 0.1171, 0.1168, 0.1174,
         0.0830],
        [0.0791, 0.0861, 0.0912, 0.1046, 0.1029, 0.0994, 0.1184, 0.1187, 0.1191,
         0.0806],
        [0.0770, 0.0860, 0.0913, 0.1046, 0.1031, 0.0998, 0.1191, 0.1202, 0.1204,
         0.0785],
        [0.0749, 0.086

KeyboardInterrupt: 

In [None]:
summarize_results(load_experiment_log())[['dt_created', 'num_epochs', 'learning_rate', 'clip_grad_max_norm', 'val_loss']].head()

In [None]:
plot_single_learning_curve(results)

In [None]:
# Epoch: 199.00, Train Loss: 0.32, Val Loss: 13.19, Train BLEU: 98.94, Val BLEU: 0.27
plot_single_learning_curve(results)

In [None]:
# with attention energies = v_broadcast.bmm(torch.tanh(self.attn(concat)).transpose(1, 2)) # switched order  
# Epoch: 199.00, Train Loss: 0.63, Val Loss: 12.82, Train BLEU: 92.05, Val BLEU: 0.38
plot_single_learning_curve(results)

In [None]:
for i, token in enumerate(vocab[SRC_LANG]['id2token']): 
    if i < 20: 
        print("{}: {}".format(i, token))

In [None]:
for i, token in enumerate(vocab[TARG_LANG]['id2token']): 
    if i < 20: 
        print("{}: {}".format(i, token))

In [None]:
import torch
x = torch.arange(0, 3*5*10).view(3, 5, 10)
print(x)
y = x[1:, :, :]
print(y)
z = y.view(-1, 10)
print(z)

In [None]:
t = torch.arange(0, 2*5).view(5, 2)
print(t)
u = t.contiguous().view(-1)
print(u)
v = t.permute(1, 0)
print(v)
w = v.contiguous().view(-1)
print(w)

In [None]:
a = torch.arange(0, 2*1*300)
print(a)
b = a.view(-1, 1, 300)
print(b.size())

In [None]:
for i, (src_idxs, targ_idxs, src_lens, targ_lens) in enumerate(full_loaders['train']):
#     print(i)
#     print(src_idxs.size())
#     print(src_idxs)
#     print(src_lens)
#     print(targ_idxs.size())
#     print(targ_idxs)
#     print(targ_lens)
    id2token = vocab[SRC_LANG]['id2token']
    test_tensor = src_idxs
    list_of_lists = test_tensor.numpy().astype(int).tolist()
    to_token = lambda l: ' '.join([id2token[idx] for idx in l])
    list_of_lists_tokens = [to_token(l) for l in list_of_lists] 
    break 