In [1]:
import numpy as np 
import pandas as pd 
from data_processing import generate_vocab, process_data, create_dataloaders
from model import get_pretrained_emb, EncoderRNN, DecoderRNN, DecoderAttnRNN, EncoderDecoder, EncoderDecoderAttn
from train_eval import train_and_eval, count_parameters, summarize_results, plot_single_learning_curve, load_experiment_log
import pickle as pkl 
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# params required for generating data loaders 

SRC_LANG = 'vi'
TARG_LANG = 'en'

SRC_MAX_SENTENCE_LEN = 10
TARG_MAX_SENTENCE_LEN = 10
SRC_VOCAB_SIZE = 30000 
TARG_VOCAB_SIZE = 30000 

BATCH_SIZE = 64 

In [3]:
# takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [4]:
# reload from pickle 
vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
vocab = pkl.load(open(vocab_filename, "rb"))
data = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, filter_long=False)
data_minibatch = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, sample_limit=BATCH_SIZE, filter_long=False) 
data_minitrain = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, sample_limit=1000, filter_long=False)

In [5]:
# create dataloaders 
loaders_full = create_dataloaders(data, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minibatch = create_dataloaders(data_minibatch, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minitrain = create_dataloaders(data_minitrain, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)

In [6]:
# model architecture params 
NETWORK_TYPE = 'rnn'
RNN_CELL_TYPE = 'gru'
NUM_LAYERS = 2 
ENC_HIDDEN_DIM = 512
DEC_HIDDEN_DIM = 2 * ENC_HIDDEN_DIM 
TEACHER_FORCING_RATIO = 1
CLIP_GRAD_MAX_NORM = 1
ENC_DROPOUT = 0.2 
DEC_DROPOUT = 0.2 
ATTENTION_TYPE = 'additive'

# training params  
NUM_EPOCHS = 5
LR = 0.0003 # 0.0005
OPTIMIZER = 'Adam'
LAZY_TRAIN = False

# name the model and experiment 
EXPERIMENT_NAME = 'hyperparameter_tuning_dropout'
if NETWORK_TYPE == 'rnn': 
    MODEL_NAME = '{}-rnn-{}-attn'.format(SRC_LANG, ATTENTION_TYPE)
elif NETWORK_TYPE == 'cnn': 
    MODEL_NAME = '{}-cnn'.format(SRC_LANG)

In [7]:
# store as dict to save to results later 
params = {'experiment_name': EXPERIMENT_NAME,'model_name': MODEL_NAME, 'src_lang': SRC_LANG, 'targ_lang': TARG_LANG, 
          'rnn_cell_type': RNN_CELL_TYPE, 'src_max_sentence_len': SRC_MAX_SENTENCE_LEN, 
          'targ_max_sentence_len': TARG_MAX_SENTENCE_LEN, 'src_vocab_size': SRC_VOCAB_SIZE, 
          'targ_vocab_size': TARG_VOCAB_SIZE, 'num_layers': NUM_LAYERS, 'enc_hidden_dim': ENC_HIDDEN_DIM, 
          'dec_hidden_dim': DEC_HIDDEN_DIM, 'teacher_forcing_ratio': TEACHER_FORCING_RATIO, 
          'clip_grad_max_norm': CLIP_GRAD_MAX_NORM, 'enc_dropout': ENC_DROPOUT, 'dec_dropout': DEC_DROPOUT, 
          'attention_type': ATTENTION_TYPE, 'batch_size': BATCH_SIZE, 'num_epochs': NUM_EPOCHS, 
          'learning_rate': LR, 'optimizer': OPTIMIZER, 'lazy_train': LAZY_TRAIN} 

In [None]:
for candidate in [0, 0.2, 0.5]: 
    
    print("Training with dropout = {}".format(candidate))
    
    # overwrite relevant key-value in params 
    params['enc_dropout'] = candidate 
    params['dec_dropout'] = candidate
    params['model_name'] = '{}-rnn-{}-attn-{}-dropout'.format(SRC_LANG, ATTENTION_TYPE, candidate)
    
    # instantiate model 
    encoder = EncoderRNN(rnn_cell_type=RNN_CELL_TYPE, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
                         src_max_sentence_len=SRC_MAX_SENTENCE_LEN, enc_dropout=candidate, 
                         pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))
    decoder = DecoderAttnRNN(rnn_cell_type=RNN_CELL_TYPE, dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, 
                             num_layers=NUM_LAYERS, targ_vocab_size=TARG_VOCAB_SIZE, 
                             src_max_sentence_len=SRC_MAX_SENTENCE_LEN, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
                             dec_dropout=candidate, attention_type=ATTENTION_TYPE,
                             pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], 
                                                                    vocab[TARG_LANG]['token2id']))
    model = EncoderDecoderAttn(encoder, decoder, vocab[TARG_LANG]['token2id']).to(device) 
    
    # train and eval 
    model, results = train_and_eval(
        model=model, loaders_full=loaders_full, loaders_minibatch=loaders_minibatch, loaders_minitrain=loaders_minitrain, 
        params=params, vocab=vocab, print_intermediate=100, save_checkpoint=True, save_to_log=True, 
        lazy_eval=True, print_attn=True, inspect_samples=1)

Training with dropout = 0
Epoch: 0.00, Train Loss: 0.00, Val Loss: 10.23, Train BLEU: 0.00, Val BLEU: 0.07, Minutes Elapsed: 0.12
Sampling from val predictions...
Source: chúng_tôi không_thể chỉnh sửa nếu ảnh không được làm sạch
Reference: we couldn &apos;t <UNK> the photo unless it was
Model: <SOS> and the the the to to to to to
Attention Weights: tensor([[0.0981, 0.0985, 0.0990, 0.0989, 0.0993, 0.1002, 0.1009, 0.1012, 0.1016,
         0.1023],
        [0.0981, 0.0985, 0.0990, 0.0989, 0.0993, 0.1002, 0.1009, 0.1012, 0.1016,
         0.1023],
        [0.0981, 0.0985, 0.0990, 0.0989, 0.0993, 0.1002, 0.1009, 0.1012, 0.1016,
         0.1023],
        [0.0981, 0.0985, 0.0990, 0.0989, 0.0993, 0.1002, 0.1009, 0.1012, 0.1016,
         0.1023],
        [0.0981, 0.0985, 0.0990, 0.0989, 0.0993, 0.1002, 0.1009, 0.1012, 0.1016,
         0.1023],
        [0.0981, 0.0985, 0.0990, 0.0989, 0.0993, 0.1002, 0.1009, 0.1012, 0.1016,
         0.1023],
        [0.0981, 0.0985, 0.0990, 0.0989, 0.0993, 0.1002



Epoch: 0.05, Train Loss: 0.00, Val Loss: 6.41, Train BLEU: 0.00, Val BLEU: 0.03, Minutes Elapsed: 2.02
Sampling from val predictions...
Source: khi trượt tay , tôi <UNK> nhớ lại người thợ_mỏ
Reference: when my hand slips , i suddenly remember a
Model: <SOS> and , , , , , , , ,
Attention Weights: tensor([[5.1922e-05, 1.5019e-02, 5.3390e-02, 8.7278e-02, 1.1048e-01, 1.3662e-01,
         1.8327e-01, 2.1258e-01, 1.7476e-01, 2.6558e-02],
        [5.8412e-06, 2.9416e-03, 2.0450e-02, 4.7121e-02, 7.3414e-02, 1.1118e-01,
         1.9768e-01, 2.8167e-01, 2.4148e-01, 2.4065e-02],
        [1.4398e-05, 3.4683e-03, 2.1164e-02, 4.7087e-02, 7.2829e-02, 1.1000e-01,
         1.9580e-01, 2.8066e-01, 2.4268e-01, 2.6297e-02],
        [1.4583e-05, 3.4601e-03, 2.1071e-02, 4.6859e-02, 7.2455e-02, 1.0947e-01,
         1.9522e-01, 2.8066e-01, 2.4398e-01, 2.6803e-02],
        [1.4359e-05, 3.4351e-03, 2.0976e-02, 4.6701e-02, 7.2252e-02, 1.0922e-01,
         1.9500e-01, 2.8073e-01, 2.4466e-01, 2.7019e-02],
        



Epoch: 0.14, Train Loss: 0.00, Val Loss: 6.23, Train BLEU: 0.00, Val BLEU: 0.61, Minutes Elapsed: 5.44
Sampling from val predictions...
Source: đó là cách duy_nhất để chúng_tôi có_thể đi học .
Reference: it was the only way we both could be
Model: <SOS> and , , , , , , , ,
Attention Weights: tensor([[0.2524, 0.2080, 0.1439, 0.0849, 0.0822, 0.0634, 0.0560, 0.0483, 0.0353,
         0.0257],
        [0.1252, 0.1335, 0.1301, 0.1127, 0.1108, 0.1020, 0.0946, 0.0833, 0.0634,
         0.0444],
        [0.0911, 0.1008, 0.1038, 0.1047, 0.1049, 0.1051, 0.1050, 0.1034, 0.0982,
         0.0831],
        [0.0783, 0.0952, 0.1017, 0.1050, 0.1051, 0.1063, 0.1066, 0.1056, 0.1028,
         0.0933],
        [0.0700, 0.0928, 0.1018, 0.1060, 0.1059, 0.1075, 0.1078, 0.1070, 0.1046,
         0.0966],
        [0.0621, 0.0906, 0.1021, 0.1073, 0.1070, 0.1089, 0.1092, 0.1084, 0.1062,
         0.0982],
        [0.0546, 0.0882, 0.1024, 0.1087, 0.1083, 0.1104, 0.1107, 0.1098, 0.1076,
         0.0993],
        [0.047



Epoch: 0.19, Train Loss: 0.00, Val Loss: 6.15, Train BLEU: 0.00, Val BLEU: 0.24, Minutes Elapsed: 7.13
Sampling from val predictions...
Source: công_nghiệp dệt may cũng thường được nghĩ đến khi nghe
Reference: the textile industry is another one we often think
Model: <SOS> so we we , , , , , ,
Attention Weights: tensor([[0.9637, 0.0306, 0.0025, 0.0011, 0.0007, 0.0005, 0.0004, 0.0003, 0.0002,
         0.0001],
        [0.3676, 0.1829, 0.0980, 0.0751, 0.0644, 0.0569, 0.0511, 0.0453, 0.0353,
         0.0233],
        [0.2265, 0.1508, 0.1039, 0.0916, 0.0853, 0.0815, 0.0765, 0.0729, 0.0632,
         0.0478],
        [0.1219, 0.1354, 0.1121, 0.1032, 0.0988, 0.0965, 0.0930, 0.0899, 0.0824,
         0.0667],
        [0.0775, 0.1231, 0.1152, 0.1090, 0.1061, 0.1043, 0.1020, 0.0983, 0.0910,
         0.0734],
        [0.0480, 0.1069, 0.1165, 0.1139, 0.1125, 0.1114, 0.1101, 0.1055, 0.0977,
         0.0775],
        [0.0303, 0.0903, 0.1162, 0.1174, 0.1178, 0.1172, 0.1168, 0.1111, 0.1030,
         0.



Epoch: 0.24, Train Loss: 0.00, Val Loss: 6.00, Train BLEU: 0.00, Val BLEU: 2.70, Minutes Elapsed: 8.84
Sampling from val predictions...
Source: tại tây phi có_một ngành buôn_bán kền_kền <UNK> kinh_khủng để
Reference: in west africa , there &apos;s a horrific trade
Model: <SOS> it &apos;s a a a , , , ,
Attention Weights: tensor([[9.8392e-01, 1.5370e-02, 5.1550e-04, 1.0617e-04, 3.7389e-05, 1.6875e-05,
         9.7165e-06, 8.6830e-06, 6.4099e-06, 4.6741e-06],
        [2.4828e-01, 2.2104e-01, 1.7124e-01, 1.3725e-01, 7.9021e-02, 4.4680e-02,
         3.0712e-02, 2.9121e-02, 2.1486e-02, 1.7167e-02],
        [6.3238e-02, 9.8611e-02, 1.3943e-01, 1.3483e-01, 1.1997e-01, 1.0765e-01,
         9.4088e-02, 8.4412e-02, 8.3641e-02, 7.4136e-02],
        [6.6530e-03, 1.4940e-02, 5.4998e-02, 7.4857e-02, 1.6067e-01, 1.7166e-01,
         1.4833e-01, 9.6149e-02, 1.4472e-01, 1.2703e-01],
        [3.1114e-03, 7.8160e-03, 5.2068e-02, 7.4738e-02, 1.7170e-01, 1.7981e-01,
         1.4923e-01, 9.1983e-02, 1.4527e-



Epoch: 0.29, Train Loss: 0.00, Val Loss: 5.86, Train BLEU: 0.00, Val BLEU: 2.29, Minutes Elapsed: 10.54
Sampling from val predictions...
Source: ước_lượng rằng có hơn 4.000 trẻ_em đang làm nô_lệ ở
Reference: it &apos;s estimated that more than 4,000 children are
Model: <SOS> but &apos;s the a the the the the the
Attention Weights: tensor([[9.1993e-01, 7.9449e-02, 6.0560e-04, 1.8425e-05, 6.0842e-07, 5.3712e-08,
         2.4291e-08, 1.6161e-08, 9.9017e-09, 5.0075e-09],
        [3.3896e-01, 2.4333e-01, 1.9856e-01, 1.1452e-01, 5.7431e-02, 1.6754e-02,
         1.2012e-02, 9.0888e-03, 5.7611e-03, 3.5818e-03],
        [1.9732e-01, 2.0352e-01, 2.4292e-01, 2.2976e-01, 9.5069e-02, 1.3301e-02,
         8.1998e-03, 5.2710e-03, 2.9728e-03, 1.6625e-03],
        [1.6540e-02, 2.6463e-02, 5.3261e-02, 2.2055e-01, 2.7285e-01, 1.2924e-01,
         1.0400e-01, 8.6820e-02, 5.4707e-02, 3.5568e-02],
        [5.6355e-04, 7.6358e-04, 1.5087e-03, 6.7336e-03, 1.0234e-01, 1.9165e-01,
         2.0994e-01, 2.0556e-0



Epoch: 0.34, Train Loss: 0.00, Val Loss: 5.71, Train BLEU: 0.00, Val BLEU: 2.36, Minutes Elapsed: 12.24
Sampling from val predictions...
Source: nhưng thay vì thế , chúng_tôi giết chết mọi thứ
Reference: instead , everything we touched we killed . <EOS>
Model: <SOS> but the , , , , we , ,
Attention Weights: tensor([[9.5112e-01, 4.8201e-02, 6.6598e-04, 1.6083e-05, 6.1296e-07, 1.3232e-07,
         4.5335e-08, 2.4618e-08, 1.3761e-08, 9.2039e-09],
        [2.0362e-01, 3.2003e-01, 3.2274e-01, 1.1500e-01, 2.0969e-02, 9.4360e-03,
         3.1868e-03, 2.2237e-03, 1.5340e-03, 1.2475e-03],
        [4.1233e-02, 9.2034e-02, 2.1158e-01, 3.6046e-01, 1.8975e-01, 5.3750e-02,
         1.8789e-02, 1.3673e-02, 1.0232e-02, 8.5050e-03],
        [3.5170e-03, 8.4938e-03, 2.7927e-02, 1.9097e-01, 4.0862e-01, 1.9175e-01,
         6.5685e-02, 4.4532e-02, 3.2579e-02, 2.5927e-02],
        [1.4598e-03, 3.0055e-03, 7.7727e-03, 5.0559e-02, 2.5979e-01, 4.0285e-01,
         1.2098e-01, 6.6304e-02, 4.7952e-02, 3.9327e-0



Epoch: 0.38, Train Loss: 0.00, Val Loss: 5.58, Train BLEU: 0.00, Val BLEU: 3.92, Minutes Elapsed: 13.97
Sampling from val predictions...
Source: phụ_nữ thắng <UNK> phần_trăm trong quốc_hội nhà_nước ở những cuộc
Reference: women won <UNK> percent of the national congress in
Model: <SOS> so i i i the the the the the
Attention Weights: tensor([[9.8251e-01, 1.7305e-02, 1.7893e-04, 8.1722e-07, 9.3665e-08, 5.4294e-08,
         3.3722e-08, 2.8628e-08, 1.8766e-08, 1.9847e-08],
        [4.3555e-01, 3.4302e-01, 1.8271e-01, 2.2261e-02, 5.8977e-03, 3.6223e-03,
         2.1788e-03, 1.9292e-03, 1.3747e-03, 1.4567e-03],
        [7.6531e-02, 1.6411e-01, 5.5999e-01, 1.3040e-01, 2.2292e-02, 1.4744e-02,
         9.5908e-03, 8.5459e-03, 6.5584e-03, 7.2276e-03],
        [9.0232e-03, 3.7056e-02, 3.2381e-01, 3.7811e-01, 8.7457e-02, 5.1439e-02,
         3.3794e-02, 3.0245e-02, 2.3490e-02, 2.5568e-02],
        [5.8037e-04, 1.2071e-03, 9.0604e-03, 1.3970e-01, 2.7345e-01, 1.8386e-01,
         1.2435e-01, 1.0854e



Epoch: 0.43, Train Loss: 0.00, Val Loss: 5.44, Train BLEU: 0.00, Val BLEU: 5.02, Minutes Elapsed: 15.79
Sampling from val predictions...
Source: cùng_nhau chúng_ta có_thể làm_cho giường ngủ của chúng_ta bàn_ăn và
Reference: together we can make our beds , our dinner
Model: <SOS> so we can to to and and and and
Attention Weights: tensor([[9.9272e-01, 7.1981e-03, 8.1266e-05, 3.8654e-07, 5.3084e-08, 3.2372e-08,
         9.9124e-09, 1.6008e-08, 5.6870e-09, 1.6889e-09],
        [2.1286e-01, 7.1437e-01, 6.1219e-02, 4.6729e-03, 2.2150e-03, 1.7101e-03,
         7.9733e-04, 1.1055e-03, 6.4607e-04, 4.0395e-04],
        [1.5577e-02, 5.9019e-01, 3.6604e-01, 1.4603e-02, 4.2023e-03, 3.1677e-03,
         1.7732e-03, 1.9892e-03, 1.4275e-03, 1.0359e-03],
        [3.6262e-03, 7.3166e-02, 5.6080e-01, 1.6386e-01, 5.6889e-02, 4.5922e-02,
         3.0607e-02, 2.2894e-02, 2.3865e-02, 1.8370e-02],
        [1.4976e-04, 7.0458e-04, 1.2876e-02, 1.7903e-01, 2.0071e-01, 2.1499e-01,
         1.3216e-01, 8.8432e-02,



Epoch: 0.48, Train Loss: 0.00, Val Loss: 5.33, Train BLEU: 0.00, Val BLEU: 4.81, Minutes Elapsed: 17.75
Sampling from val predictions...
Source: cám_ơn rất nhiều . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: thank you very much . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> it &apos;s . <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.9106, 0.0892, 0.0002, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.4299, 0.4689, 0.0854, 0.0128, 0.0029, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1224, 0.1897, 0.1911, 0.2713, 0.2256, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0714, 0.1022, 0.1585, 0.2470, 0.4209, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0582, 0.0688, 0.1048, 0.2186, 0.5496, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0810, 0.0951, 0.1177, 0.2305, 0.4757, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1233, 0.1455, 0.1286, 0.1954, 0.4071, 0.0000, 0.0000, 0.0



Epoch: 0.53, Train Loss: 0.00, Val Loss: 5.26, Train BLEU: 0.00, Val BLEU: 6.31, Minutes Elapsed: 19.45
Sampling from val predictions...
Source: tại phòng_thí_nghiệm chúng_tôi đã thử qua nhiều mẫu , và
Reference: we tested a lot of specimens there , and
Model: <SOS> so we to to of the , , ,
Attention Weights: tensor([[9.9159e-01, 8.3874e-03, 2.6217e-05, 2.1467e-07, 1.0928e-08, 4.2805e-09,
         2.4576e-09, 1.4298e-09, 8.2246e-10, 3.4698e-10],
        [3.4592e-01, 2.1866e-01, 4.1974e-01, 1.0367e-02, 2.0402e-03, 1.2582e-03,
         8.0917e-04, 5.2998e-04, 3.9679e-04, 2.7466e-04],
        [4.5531e-02, 8.2522e-02, 7.3514e-01, 1.0345e-01, 1.4262e-02, 9.1480e-03,
         4.0067e-03, 2.4804e-03, 1.8610e-03, 1.5958e-03],
        [1.5286e-03, 3.6302e-03, 6.3022e-02, 5.3352e-01, 1.2239e-01, 7.3827e-02,
         6.6645e-02, 6.3320e-02, 4.6623e-02, 2.5499e-02],
        [1.9094e-04, 3.7789e-04, 2.8091e-03, 1.9090e-01, 2.0295e-01, 1.6433e-01,
         1.5149e-01, 1.4059e-01, 9.7718e-02, 4.8642e



Epoch: 0.58, Train Loss: 0.00, Val Loss: 5.17, Train BLEU: 0.00, Val BLEU: 6.43, Minutes Elapsed: 21.16
Sampling from val predictions...
Source: 5 năm trước , tôi đã trải qua 1 chuyện
Reference: five years ago , i experienced a bit of
Model: <SOS> i years years years i i to a a
Attention Weights: tensor([[9.9723e-01, 2.7141e-03, 4.9398e-05, 5.5068e-06, 1.4708e-06, 3.8293e-07,
         1.8066e-07, 1.3277e-07, 1.3150e-07, 5.8499e-08],
        [4.5803e-01, 5.0813e-01, 2.4355e-02, 4.0235e-03, 2.1279e-03, 1.0904e-03,
         7.8051e-04, 6.3399e-04, 5.4000e-04, 2.8536e-04],
        [5.9405e-02, 8.4209e-01, 8.3836e-02, 8.5493e-03, 2.6530e-03, 1.3090e-03,
         7.8756e-04, 6.6477e-04, 5.0000e-04, 2.0564e-04],
        [5.2587e-03, 6.1494e-01, 2.9303e-01, 6.2708e-02, 1.1733e-02, 4.4485e-03,
         2.6892e-03, 2.3097e-03, 1.8109e-03, 1.0673e-03],
        [1.7330e-03, 1.7411e-01, 2.9476e-01, 3.3706e-01, 1.1436e-01, 3.4311e-02,
         1.7142e-02, 1.2263e-02, 9.2116e-03, 5.0500e-03],
       



Epoch: 0.62, Train Loss: 0.00, Val Loss: 5.08, Train BLEU: 0.00, Val BLEU: 7.39, Minutes Elapsed: 22.87
Sampling from val predictions...
Source: khi tôi viết bài phát_biểu này , tôi cảm_thấy rất
Reference: so when i was writing this talk , i
Model: <SOS> when i i i my , i i i
Attention Weights: tensor([[9.9840e-01, 1.5952e-03, 4.9896e-07, 1.2185e-08, 1.5086e-09, 7.1725e-10,
         3.7012e-10, 2.3630e-10, 9.7070e-11, 7.5952e-11],
        [1.2761e-01, 8.4127e-01, 3.0137e-02, 6.4952e-04, 1.1477e-04, 5.6201e-05,
         5.1214e-05, 4.5394e-05, 3.4391e-05, 3.2071e-05],
        [1.2729e-02, 3.8334e-02, 9.1225e-01, 2.9943e-02, 2.3783e-03, 1.2346e-03,
         9.5532e-04, 7.7065e-04, 7.2797e-04, 6.8163e-04],
        [7.6005e-04, 1.2405e-03, 7.8044e-01, 2.0397e-01, 6.8069e-03, 2.4815e-03,
         1.4503e-03, 1.0079e-03, 9.4002e-04, 9.0269e-04],
        [9.0732e-04, 1.7617e-03, 7.4739e-01, 2.2067e-01, 1.4991e-02, 4.5125e-03,
         3.2455e-03, 2.6855e-03, 2.0231e-03, 1.8211e-03],
        [



Epoch: 0.67, Train Loss: 0.00, Val Loss: 5.02, Train BLEU: 0.00, Val BLEU: 6.81, Minutes Elapsed: 24.57
Sampling from val predictions...
Source: không bao_giờ có từ " tôi , và từ "
Reference: never the word &quot; i , &quot; and the
Model: <SOS> he &apos;s a , &quot; , and and ,
Attention Weights: tensor([[9.9948e-01, 5.1529e-04, 8.3480e-06, 1.1831e-07, 2.9249e-09, 3.8749e-09,
         9.4236e-10, 2.6717e-10, 2.9807e-10, 2.3947e-10],
        [4.9437e-01, 3.5671e-01, 1.4376e-01, 4.5042e-03, 2.2653e-04, 2.6150e-04,
         7.8674e-05, 3.1972e-05, 3.5863e-05, 2.6303e-05],
        [1.3913e-01, 3.3354e-01, 4.2442e-01, 8.7109e-02, 7.9731e-03, 4.3986e-03,
         1.8729e-03, 7.5896e-04, 4.6784e-04, 3.3160e-04],
        [7.8936e-04, 3.3575e-02, 2.5529e-01, 4.0273e-01, 1.7695e-01, 8.5176e-02,
         3.0963e-02, 8.3346e-03, 4.0421e-03, 2.1588e-03],
        [2.9608e-04, 2.0698e-03, 1.9974e-02, 1.3800e-01, 4.3301e-01, 2.7376e-01,
         9.0505e-02, 2.2703e-02, 1.2370e-02, 7.3179e-03],
      



Epoch: 0.72, Train Loss: 0.00, Val Loss: 4.95, Train BLEU: 0.00, Val BLEU: 8.12, Minutes Elapsed: 26.25
Sampling from val predictions...
Source: nó thật_là kì_cục . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: that was awkward . <EOS> <PAD> <PAD> <PAD> <PAD>
Model: <SOS> it &apos;s the . <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.9996, 0.0004, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.2595, 0.7223, 0.0178, 0.0003, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0938, 0.7159, 0.1851, 0.0046, 0.0005, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0156, 0.2083, 0.6771, 0.0809, 0.0181, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0069, 0.0294, 0.7008, 0.1652, 0.0977, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0278, 0.0590, 0.2885, 0.2692, 0.3554, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1558, 0.1886, 0.2522, 0.1654, 0.2380, 0.0000, 0.0000, 0



Epoch: 0.77, Train Loss: 0.00, Val Loss: 4.92, Train BLEU: 0.00, Val BLEU: 7.98, Minutes Elapsed: 27.95
Sampling from val predictions...
Source: tôi chắc rằng bạn trong số những người đang nghe
Reference: i promise you there are several people listening to
Model: <SOS> i &apos;m that you in the you you you
Attention Weights: tensor([[9.9996e-01, 3.9105e-05, 5.3797e-08, 4.0193e-09, 8.1217e-10, 4.2523e-10,
         2.5031e-10, 2.0006e-10, 1.4880e-10, 1.1661e-10],
        [4.9606e-02, 9.3736e-01, 1.1298e-02, 1.0061e-03, 2.3135e-04, 1.7527e-04,
         1.1040e-04, 8.1684e-05, 7.1943e-05, 5.7011e-05],
        [1.3542e-02, 8.2655e-01, 1.4933e-01, 7.2143e-03, 1.9168e-03, 7.3743e-04,
         2.8431e-04, 1.8751e-04, 1.3694e-04, 9.1794e-05],
        [1.2479e-04, 8.1035e-02, 7.0371e-01, 1.2723e-01, 5.8795e-02, 1.6877e-02,
         4.6711e-03, 3.3775e-03, 2.1807e-03, 2.0055e-03],
        [8.0127e-06, 3.7272e-04, 1.0276e-02, 2.8234e-01, 3.9118e-01, 2.2172e-01,
         4.7461e-02, 2.4306e-02, 1.3



Epoch: 0.82, Train Loss: 0.00, Val Loss: 4.87, Train BLEU: 0.00, Val BLEU: 8.70, Minutes Elapsed: 29.66
Sampling from val predictions...
Source: chúng_tôi chụp_ảnh liên_tục . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: we take photos constantly . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> we &apos;re the . <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.9984, 0.0016, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0387, 0.9423, 0.0185, 0.0005, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0058, 0.7729, 0.2089, 0.0115, 0.0009, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0003, 0.0355, 0.5941, 0.2952, 0.0749, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0018, 0.0431, 0.1952, 0.4375, 0.3224, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0129, 0.1275, 0.1525, 0.3347, 0.3724, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0237, 0.2521, 0.1619, 0.2715, 0.2908, 0.0



Epoch: 0.86, Train Loss: 0.00, Val Loss: 4.81, Train BLEU: 0.00, Val BLEU: 8.80, Minutes Elapsed: 31.40
Sampling from val predictions...
Source: và tôi có được một công_tắc để có_thể tắt và
Reference: and i got a switch where i can switch
Model: <SOS> and i had a a to to and to
Attention Weights: tensor([[3.1774e-02, 9.6633e-01, 1.8888e-03, 4.3983e-06, 1.4235e-07, 2.5741e-08,
         5.7088e-09, 1.8459e-09, 2.7150e-10, 3.7006e-11],
        [5.0049e-03, 9.7801e-01, 1.6602e-02, 3.0684e-04, 4.0418e-05, 1.5832e-05,
         1.0747e-05, 5.2071e-06, 2.0523e-06, 7.1309e-07],
        [5.3961e-04, 1.9048e-02, 7.8966e-01, 1.7794e-01, 9.4362e-03, 2.1946e-03,
         8.0309e-04, 2.5937e-04, 9.1126e-05, 2.5521e-05],
        [1.7195e-04, 1.8023e-03, 1.6105e-01, 4.7726e-01, 2.7552e-01, 6.7399e-02,
         1.4540e-02, 1.7791e-03, 3.7992e-04, 9.2603e-05],
        [5.9657e-05, 4.1242e-05, 1.4126e-03, 1.1897e-01, 3.8275e-01, 3.4883e-01,
         1.1408e-01, 1.7336e-02, 1.3328e-02, 3.1926e-03],
       



Epoch: 0.91, Train Loss: 0.00, Val Loss: 4.76, Train BLEU: 0.00, Val BLEU: 8.32, Minutes Elapsed: 33.12
Sampling from val predictions...
Source: đối_với tôi , afghanistan là một đất_nước của hy_vọng ,
Reference: to me , afghanistan is a country of hope
Model: <SOS> my i , a was a a , ,
Attention Weights: tensor([[8.1936e-01, 1.8006e-01, 5.7759e-04, 8.1275e-08, 2.0820e-10, 2.0479e-11,
         2.8208e-12, 1.3780e-12, 1.0480e-12, 9.8937e-13],
        [2.6197e-01, 4.7730e-01, 2.0980e-01, 5.0012e-02, 6.4569e-04, 1.4947e-04,
         4.4590e-05, 3.1158e-05, 2.1478e-05, 2.1117e-05],
        [1.7142e-02, 1.2415e-02, 3.2420e-01, 6.3276e-01, 1.1710e-02, 1.1752e-03,
         3.3891e-04, 1.3714e-04, 7.5942e-05, 4.4673e-05],
        [9.6100e-04, 3.2500e-04, 4.3948e-02, 8.9632e-01, 4.8454e-02, 5.9782e-03,
         2.3187e-03, 9.9087e-04, 4.5377e-04, 2.4578e-04],
        [2.2881e-04, 2.0450e-04, 2.9011e-03, 5.8958e-01, 3.1518e-01, 5.2358e-02,
         2.3102e-02, 1.0211e-02, 4.2499e-03, 1.9763e-03],



Epoch: 0.96, Train Loss: 0.00, Val Loss: 4.73, Train BLEU: 0.00, Val BLEU: 8.79, Minutes Elapsed: 34.86
Sampling from val predictions...
Source: nên tối đó , tôi truy_cập facebook và hỏi một_vài
Reference: so that evening , i just reached out on
Model: <SOS> so i i i i i to and and
Attention Weights: tensor([[7.8656e-01, 1.6597e-01, 4.7456e-02, 1.0817e-05, 6.9755e-07, 6.6376e-09,
         2.5788e-09, 1.3112e-10, 1.0312e-10, 5.6021e-11],
        [1.1658e-01, 6.2211e-01, 2.5818e-01, 1.9542e-03, 9.0341e-04, 1.4782e-04,
         7.7283e-05, 1.4553e-05, 1.6466e-05, 1.4547e-05],
        [8.8900e-02, 3.1238e-01, 4.6973e-01, 7.4753e-02, 4.5816e-02, 5.8667e-03,
         2.0043e-03, 1.8098e-04, 1.8012e-04, 1.8494e-04],
        [5.2350e-02, 1.4253e-01, 2.0572e-01, 2.5832e-01, 2.1103e-01, 1.1496e-01,
         1.2573e-02, 6.9845e-04, 9.2463e-04, 8.9406e-04],
        [2.7977e-03, 9.3068e-03, 2.5580e-02, 2.2905e-01, 4.8225e-01, 2.1811e-01,
         2.8844e-02, 1.5119e-03, 1.3137e-03, 1.2322e-03],
   



Epoch: 1.00, Train Loss: 0.00, Val Loss: 4.73, Train BLEU: 0.00, Val BLEU: 8.69, Minutes Elapsed: 36.31
Sampling from val predictions...
Source: chúng_tôi có những quả cà_chua rất tuyệt_vời . tại ý
Reference: we had these magnificent tomatoes . in italy ,
Model: <SOS> we have have the the . <EOS> &apos;s .
Attention Weights: tensor([[9.9973e-01, 2.6689e-04, 1.8986e-06, 5.5424e-08, 6.4761e-09, 1.1885e-08,
         3.2351e-09, 1.4462e-09, 2.1355e-09, 1.1785e-09],
        [1.0173e-01, 8.3139e-01, 6.3989e-02, 1.7140e-03, 3.4219e-04, 4.5520e-04,
         1.5410e-04, 6.1604e-05, 9.1998e-05, 6.8583e-05],
        [2.1887e-02, 4.2946e-01, 5.2638e-01, 1.9926e-02, 1.0747e-03, 8.9445e-04,
         1.7310e-04, 5.5470e-05, 8.2002e-05, 6.7698e-05],
        [4.2320e-05, 1.0800e-03, 2.6902e-01, 6.2635e-01, 6.5786e-02, 2.4393e-02,
         9.6589e-03, 1.7205e-03, 1.3648e-03, 5.8277e-04],
        [4.1402e-06, 9.2299e-05, 5.5873e-02, 7.0833e-01, 1.3066e-01, 7.7672e-02,
         1.8664e-02, 3.8474e-03, 3.3



Epoch: 1.05, Train Loss: 0.00, Val Loss: 4.68, Train BLEU: 0.00, Val BLEU: 9.85, Minutes Elapsed: 37.98
Sampling from val predictions...
Source: bỗng_nhiên , tôi chẳng có một đất_nước nào để có_thể
Reference: suddenly , there was no country i could proudly
Model: <SOS> now , i i i to to i to
Attention Weights: tensor([[9.9975e-01, 2.4688e-04, 4.4651e-06, 1.2501e-07, 5.1281e-09, 5.9680e-10,
         1.2138e-10, 1.3881e-10, 9.4156e-11, 4.2250e-11],
        [7.9726e-01, 1.1631e-01, 5.5580e-02, 2.7228e-02, 2.3831e-03, 6.0683e-04,
         2.0120e-04, 1.9412e-04, 1.5171e-04, 8.5230e-05],
        [2.5232e-01, 1.9219e-01, 3.5139e-01, 1.9118e-01, 8.9511e-03, 1.8191e-03,
         6.5606e-04, 6.3848e-04, 5.3311e-04, 3.1185e-04],
        [1.7393e-02, 4.4153e-02, 1.8755e-01, 7.2399e-01, 2.2126e-02, 2.6139e-03,
         7.6137e-04, 6.5636e-04, 4.7746e-04, 2.7820e-04],
        [4.6725e-03, 8.0790e-03, 2.6900e-02, 8.4621e-01, 9.8718e-02, 1.0692e-02,
         2.3910e-03, 1.4459e-03, 5.7303e-04, 3.1689



Epoch: 1.10, Train Loss: 0.00, Val Loss: 4.64, Train BLEU: 0.00, Val BLEU: 10.31, Minutes Elapsed: 40.33
Sampling from val predictions...
Source: và cái chúng_tôi làm là trở_thành bạn của nhau ,
Reference: and what we do , we become friends ,
Model: <SOS> and what we do do is is a ,
Attention Weights: tensor([[1.0570e-02, 9.7885e-01, 1.0519e-02, 5.9779e-05, 1.0222e-07, 2.3582e-09,
         8.0455e-10, 1.0500e-10, 9.2477e-11, 2.7676e-11],
        [8.7811e-04, 9.8549e-01, 1.3498e-02, 1.2821e-04, 4.4830e-06, 1.0154e-06,
         6.3463e-07, 1.1761e-07, 8.0349e-08, 4.1168e-08],
        [1.6430e-03, 2.6242e-01, 5.1566e-01, 2.1487e-01, 4.5464e-03, 5.3959e-04,
         2.2192e-04, 4.7355e-05, 3.8731e-05, 1.4452e-05],
        [9.7185e-05, 4.8491e-03, 4.8789e-02, 7.6769e-01, 1.5848e-01, 1.6398e-02,
         2.9962e-03, 3.2994e-04, 2.7863e-04, 8.5104e-05],
        [1.0125e-04, 1.1918e-03, 5.1137e-03, 2.6883e-01, 4.2261e-01, 2.4069e-01,
         5.4087e-02, 4.5921e-03, 2.2886e-03, 4.9117e-04],
  



Epoch: 1.14, Train Loss: 0.00, Val Loss: 4.66, Train BLEU: 0.00, Val BLEU: 9.90, Minutes Elapsed: 42.74
Sampling from val predictions...
Source: chúng_tôi bắt_đầu chỉnh sửa những tấm ảnh này . <EOS>
Reference: so we started <UNK> photos . <EOS> <PAD> <PAD>
Model: <SOS> we started we to the the <EOS> . the
Attention Weights: tensor([[9.9977e-01, 2.2551e-04, 2.8294e-07, 8.5282e-09, 1.4492e-09, 6.4891e-10,
         4.7019e-10, 2.5082e-10, 1.4052e-10, 8.0801e-11],
        [1.7462e-01, 7.8866e-01, 3.3711e-02, 1.8840e-03, 5.7379e-04, 2.5076e-04,
         1.7664e-04, 7.8109e-05, 2.8769e-05, 1.7072e-05],
        [3.4526e-02, 4.5386e-01, 4.5539e-01, 5.2631e-02, 2.4006e-03, 5.5762e-04,
         4.2324e-04, 1.2343e-04, 5.8982e-05, 2.7927e-05],
        [5.8575e-02, 4.3746e-01, 4.2218e-01, 7.3898e-02, 5.8659e-03, 1.0560e-03,
         6.6635e-04, 1.8844e-04, 7.2501e-05, 3.2176e-05],
        [8.2292e-05, 6.5622e-03, 3.1905e-01, 4.7354e-01, 1.5262e-01, 3.1656e-02,
         1.3071e-02, 1.8888e-03, 1.06



Epoch: 1.19, Train Loss: 0.00, Val Loss: 4.61, Train BLEU: 0.00, Val BLEU: 10.03, Minutes Elapsed: 45.09
Sampling from val predictions...
Source: chúng_tôi nói với những người zambia , " chúa ơi
Reference: and we said to the <UNK> , &quot; my
Model: <SOS> we say we about the , , , ,
Attention Weights: tensor([[9.9993e-01, 7.1876e-05, 1.3267e-06, 7.1259e-08, 2.2601e-08, 8.6704e-09,
         3.3331e-09, 2.7924e-09, 3.2221e-09, 1.8688e-09],
        [1.0640e-01, 8.2811e-01, 5.1420e-02, 7.6415e-03, 2.8403e-03, 1.2388e-03,
         5.4922e-04, 4.2748e-04, 8.8009e-04, 4.9531e-04],
        [1.3423e-02, 3.4853e-01, 5.6585e-01, 4.6273e-02, 1.5036e-02, 7.5709e-03,
         1.2734e-03, 7.0257e-04, 8.1744e-04, 5.2459e-04],
        [5.9565e-02, 3.6290e-01, 5.2264e-01, 3.8479e-02, 9.4773e-03, 5.1059e-03,
         7.0924e-04, 3.3031e-04, 5.0928e-04, 2.7971e-04],
        [1.4986e-05, 3.0623e-03, 3.2783e-01, 2.9837e-01, 2.1020e-01, 1.2540e-01,
         2.2728e-02, 5.0331e-03, 4.4092e-03, 2.9523e-03],
  



Epoch: 1.24, Train Loss: 0.00, Val Loss: 4.56, Train BLEU: 0.00, Val BLEU: 10.55, Minutes Elapsed: 47.63
Sampling from val predictions...
Source: khi một cái gì đó trở_nên cực rẻ , nó
Reference: when something becomes ultra-low cost , it becomes massively
Model: <SOS> as a was the , , it it it
Attention Weights: tensor([[9.9374e-01, 5.0528e-03, 1.1606e-03, 4.3043e-05, 7.0949e-06, 9.6905e-08,
         1.0774e-08, 4.1916e-09, 5.4010e-10, 3.1842e-10],
        [1.8897e-02, 8.3300e-01, 1.3029e-01, 9.6462e-03, 7.3328e-03, 6.0909e-04,
         1.3911e-04, 5.4435e-05, 1.7362e-05, 1.6818e-05],
        [1.2320e-02, 8.7958e-02, 3.3402e-01, 1.1668e-01, 2.1421e-01, 1.9034e-01,
         3.5147e-02, 8.6831e-03, 3.2313e-04, 3.2053e-04],
        [1.1143e-03, 6.9106e-03, 5.7495e-02, 4.4069e-02, 1.5627e-01, 4.4006e-01,
         1.9575e-01, 9.6406e-02, 1.1395e-03, 7.8547e-04],
        [1.5839e-05, 2.3020e-04, 1.6625e-03, 4.6137e-03, 2.6330e-02, 2.7249e-01,
         4.0015e-01, 2.9036e-01, 2.6166e-03, 1.53



Epoch: 1.29, Train Loss: 0.00, Val Loss: 4.57, Train BLEU: 0.00, Val BLEU: 10.40, Minutes Elapsed: 50.14
Sampling from val predictions...
Source: ít_hơn 6 % phụ_nữ tuổi_tôi được học sau trung_học phổ_thông
Reference: fewer than six percent of women my age have
Model: <SOS> in of of percent of the are the in
Attention Weights: tensor([[9.9993e-01, 6.5739e-05, 1.1610e-06, 4.0498e-09, 5.6860e-10, 1.1610e-10,
         5.6005e-11, 2.8073e-11, 1.8143e-11, 1.6844e-11],
        [8.3034e-02, 4.1110e-01, 4.9410e-01, 7.3477e-03, 2.6752e-03, 7.5058e-04,
         4.1087e-04, 2.9269e-04, 1.5444e-04, 1.3497e-04],
        [1.4659e-02, 7.3205e-02, 8.3992e-01, 4.9737e-02, 1.4576e-02, 4.0121e-03,
         1.7126e-03, 1.2677e-03, 4.6135e-04, 4.5005e-04],
        [5.7597e-04, 4.3994e-03, 6.3649e-01, 1.9317e-01, 1.0737e-01, 3.2220e-02,
         1.3525e-02, 8.9031e-03, 1.6639e-03, 1.6885e-03],
        [3.1599e-04, 7.6687e-04, 1.5861e-01, 2.1164e-01, 2.7811e-01, 2.0983e-01,
         8.7639e-02, 4.2982e-02, 5.



Epoch: 1.34, Train Loss: 0.00, Val Loss: 4.52, Train BLEU: 0.00, Val BLEU: 10.48, Minutes Elapsed: 52.64
Sampling from val predictions...
Source: vì_thế chúng_tôi có_thể nhận sự trợ_giúp từ cộng_đồng quốc_tế cho
Reference: so we can benefit from the international community for
Model: <SOS> so we we make from from from from from
Attention Weights: tensor([[9.9709e-01, 2.9074e-03, 3.5503e-06, 4.9036e-08, 6.8215e-10, 9.5937e-11,
         1.4066e-11, 2.3131e-12, 8.5408e-13, 7.6203e-13],
        [1.9496e-01, 7.2370e-01, 7.5623e-02, 5.1407e-03, 3.8938e-04, 1.2411e-04,
         3.7519e-05, 1.9700e-05, 8.2929e-06, 5.0193e-06],
        [2.3567e-02, 1.1315e-01, 6.2469e-01, 2.2295e-01, 1.1915e-02, 2.4551e-03,
         6.0333e-04, 3.6883e-04, 1.6288e-04, 1.3260e-04],
        [3.7029e-03, 8.3690e-03, 1.4669e-01, 7.0520e-01, 1.2239e-01, 1.1966e-02,
         9.4730e-04, 4.6197e-04, 1.8171e-04, 9.1703e-05],
        [6.0100e-04, 2.3213e-04, 1.0260e-03, 1.5116e-01, 5.4650e-01, 2.8831e-01,
         8.056



Epoch: 1.38, Train Loss: 0.00, Val Loss: 4.49, Train BLEU: 0.00, Val BLEU: 11.07, Minutes Elapsed: 55.14
Sampling from val predictions...
Source: một slide như thế này , không_những nhàm_chán mà_còn phụ_thuộc
Reference: a slide like this is not only boring ,
Model: <SOS> a a , this , , , , ,
Attention Weights: tensor([[9.9522e-01, 4.3422e-03, 4.3032e-04, 5.7515e-06, 8.1584e-07, 1.0456e-07,
         8.1676e-08, 2.5157e-08, 6.5550e-09, 2.9150e-09],
        [4.0790e-01, 4.0326e-01, 1.5570e-01, 2.7007e-02, 2.4025e-03, 9.9934e-04,
         1.3769e-03, 7.4195e-04, 4.0112e-04, 2.1225e-04],
        [1.8455e-01, 1.8515e-01, 3.9971e-01, 1.8804e-01, 2.5199e-02, 5.9351e-03,
         6.9473e-03, 2.6037e-03, 1.3239e-03, 5.3556e-04],
        [1.1747e-02, 3.5753e-02, 3.3470e-01, 4.1784e-01, 7.8275e-02, 6.7028e-02,
         3.6518e-02, 1.0160e-02, 5.8902e-03, 2.0877e-03],
        [3.7411e-04, 1.7051e-03, 2.1985e-02, 1.6834e-01, 6.9596e-02, 2.9965e-01,
         3.4571e-01, 6.8005e-02, 2.0075e-02, 4.5598



Epoch: 1.43, Train Loss: 0.00, Val Loss: 4.46, Train BLEU: 0.00, Val BLEU: 11.16, Minutes Elapsed: 57.64
Sampling from val predictions...
Source: cám_ơn . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: thank you . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Model: <SOS> thank . . <EOS> . <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.9998, 0.0002, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.9543, 0.0392, 0.0065, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.6570, 0.1559, 0.1871, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.4738, 0.1213, 0.4049, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.5679, 0.1512, 0.2809, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.7235, 0.1722, 0.1044, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.6817, 0.1993, 0.1190, 0.0000, 0.0000, 0.0000, 0.0000, 0.000



Epoch: 1.48, Train Loss: 0.00, Val Loss: 4.44, Train BLEU: 0.00, Val BLEU: 11.02, Minutes Elapsed: 60.12
Sampling from val predictions...
Source: tuy_nhiên , sau khi chúng_tôi qua được biên_giới , gia_đình
Reference: but even after we got past the border ,
Model: <SOS> however , , when we when , , ,
Attention Weights: tensor([[9.9859e-01, 1.4093e-03, 8.0289e-07, 2.0302e-08, 1.1705e-09, 2.5788e-10,
         5.5442e-11, 2.9751e-11, 5.9946e-12, 3.4534e-12],
        [4.8279e-01, 4.4383e-01, 6.3945e-02, 6.1303e-03, 1.7545e-03, 7.4494e-04,
         3.8457e-04, 2.4313e-04, 9.4576e-05, 8.0722e-05],
        [2.2330e-01, 3.3036e-01, 3.3713e-01, 9.2573e-02, 8.7723e-03, 4.5201e-03,
         2.0207e-03, 8.1031e-04, 2.6007e-04, 2.6393e-04],
        [6.5365e-03, 4.9148e-02, 3.5103e-01, 4.2441e-01, 1.2442e-01, 2.6643e-02,
         9.8913e-03, 3.8870e-03, 1.8439e-03, 2.1818e-03],
        [1.8729e-03, 4.3023e-03, 3.0964e-02, 2.1997e-01, 3.5890e-01, 2.9077e-01,
         6.4104e-02, 1.7136e-02, 6.0290e-03



Epoch: 1.53, Train Loss: 0.00, Val Loss: 4.45, Train BLEU: 0.00, Val BLEU: 11.29, Minutes Elapsed: 62.62
Sampling from val predictions...
Source: tôi đã bị sốc . <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i was so shocked . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> i was still . . <EOS> . <EOS> <EOS>
Attention Weights: tensor([[0.9819, 0.0170, 0.0010, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0071, 0.3391, 0.6224, 0.0306, 0.0007, 0.0001, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0031, 0.0802, 0.6689, 0.2405, 0.0069, 0.0005, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0006, 0.0041, 0.1314, 0.7947, 0.0413, 0.0280, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0016, 0.0065, 0.1165, 0.3631, 0.1962, 0.3160, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0031, 0.0489, 0.2273, 0.2104, 0.0810, 0.4293, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0115, 0.0903, 0.3893, 0.2270, 0.0564, 0.2255, 0.0000, 0.0000, 0.0000,
         0.



Epoch: 1.58, Train Loss: 0.00, Val Loss: 4.40, Train BLEU: 0.00, Val BLEU: 11.69, Minutes Elapsed: 65.07
Sampling from val predictions...
Source: tất_cả những điều đó hoàn_toàn có_thể chỉ với chiếc điện_thoại_di_động
Reference: all this is possible with your mobile phone .
Model: <SOS> all of that all to to to to to
Attention Weights: tensor([[4.6944e-01, 3.7211e-01, 1.5693e-01, 1.5048e-03, 2.0223e-05, 3.2773e-07,
         3.1045e-08, 6.1928e-09, 2.2108e-09, 1.3880e-09],
        [6.7837e-02, 5.1354e-01, 3.6263e-01, 3.9730e-02, 1.5146e-02, 5.7818e-04,
         3.5798e-04, 1.0559e-04, 4.3536e-05, 2.9426e-05],
        [3.2121e-02, 3.8652e-01, 2.1117e-01, 1.3108e-01, 2.2063e-01, 1.1209e-02,
         5.2995e-03, 1.3128e-03, 4.0486e-04, 2.4301e-04],
        [3.9306e-03, 4.7715e-02, 3.3299e-02, 9.1246e-02, 6.5966e-01, 6.6258e-02,
         8.4978e-02, 1.0015e-02, 1.9685e-03, 9.3447e-04],
        [6.7521e-04, 7.0094e-03, 9.9645e-03, 2.1527e-02, 4.5303e-01, 9.1135e-02,
         2.8971e-01, 1.074



Epoch: 1.62, Train Loss: 0.00, Val Loss: 4.36, Train BLEU: 0.00, Val BLEU: 12.22, Minutes Elapsed: 67.55
Sampling from val predictions...
Source: không có đạo_diễn nghệ_thuật , không có nhà tạo mẫu
Reference: there were no art directors , no <UNK> ,
Model: <SOS> there &apos;s no , , no no &apos;s no
Attention Weights: tensor([[9.9924e-01, 7.3552e-04, 2.7496e-05, 1.5152e-06, 1.4876e-07, 8.1551e-08,
         3.4223e-08, 1.8255e-08, 1.0682e-08, 1.1487e-08],
        [4.0242e-01, 1.9920e-01, 3.2816e-01, 6.3303e-02, 1.9295e-03, 2.3042e-03,
         7.6198e-04, 8.4073e-04, 5.9900e-04, 4.7696e-04],
        [2.5574e-01, 1.0452e-01, 4.4169e-01, 1.6885e-01, 9.7285e-03, 1.2906e-02,
         2.8376e-03, 1.7940e-03, 1.1116e-03, 8.2146e-04],
        [2.1976e-02, 3.3590e-02, 6.4399e-01, 2.7184e-01, 1.2712e-02, 8.4235e-03,
         3.1707e-03, 2.1536e-03, 1.2227e-03, 9.2028e-04],
        [3.0774e-03, 6.3533e-03, 2.4833e-01, 5.3265e-01, 1.2282e-01, 5.2327e-02,
         7.9131e-03, 1.2418e-02, 7.4897e-03



Epoch: 1.67, Train Loss: 0.00, Val Loss: 4.37, Train BLEU: 0.00, Val BLEU: 11.43, Minutes Elapsed: 70.04
Sampling from val predictions...
Source: và cuối_cùng , tôi đã có một thoả_thuận với họ
Reference: so at the end , i had a settlement
Model: <SOS> and finally , , , i had a a
Attention Weights: tensor([[2.0793e-02, 9.7721e-01, 1.9746e-03, 2.2227e-05, 3.2071e-06, 3.6737e-07,
         1.3984e-08, 1.0584e-09, 5.3013e-09, 1.1298e-09],
        [3.2284e-03, 9.8454e-01, 1.1101e-02, 7.9759e-04, 2.4356e-04, 7.3196e-05,
         1.0422e-05, 3.3624e-06, 2.7502e-06, 1.4081e-06],
        [9.5506e-03, 7.4993e-01, 1.9630e-01, 1.7733e-02, 1.5063e-02, 8.9483e-03,
         1.7433e-03, 3.6707e-04, 2.6143e-04, 1.0971e-04],
        [5.7078e-03, 8.2584e-02, 2.0385e-01, 2.3488e-01, 3.2442e-01, 1.2894e-01,
         1.3861e-02, 2.7580e-03, 1.9197e-03, 1.0743e-03],
        [3.1338e-03, 1.8165e-02, 4.7098e-01, 2.4799e-01, 1.5908e-01, 7.3851e-02,
         1.6335e-02, 4.0443e-03, 3.8091e-03, 2.6084e-03],
      

In [None]:
# experiment_results = load_experiment_log(experiment_name=EXPERIMENT_NAME)
# summarize_results(experiment_results)[['model_name', 'enc_dropout', 'dec_dropout', 'best_val_loss', 'best_val_bleu', 
#                                        'total_params', 'trainable_params', 'runtime']]