In [1]:
import numpy as np 
import pandas as pd 
import torch
from data_processing import generate_vocab, process_data, create_dataloaders
from model import get_pretrained_emb, EncoderRNN, DecoderRNN, DecoderAttnRNN, EncoderDecoder, EncoderDecoderAttn
from train_eval import train_and_eval, count_parameters, summarize_results, plot_single_learning_curve, load_experiment_log
import pickle as pkl 
from datetime import datetime
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# params required for generating data loaders 

SRC_LANG = 'vi'
TARG_LANG = 'en'

SRC_MAX_SENTENCE_LEN = 10
TARG_MAX_SENTENCE_LEN = 10
SRC_VOCAB_SIZE = 30000 
TARG_VOCAB_SIZE = 30000 

BATCH_SIZE = 64 

In [3]:
# takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [4]:
# reload from pickle 
vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
vocab = pkl.load(open(vocab_filename, "rb"))
data = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, filter_long=False)
data_minibatch = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, sample_limit=BATCH_SIZE, filter_long=False) 
data_minitrain = process_data(SRC_LANG, TARG_LANG, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, vocab, sample_limit=1000, filter_long=False)

In [5]:
# create dataloaders 
loaders_full = create_dataloaders(data, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minibatch = create_dataloaders(data_minibatch, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minitrain = create_dataloaders(data_minitrain, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)

In [6]:
# model architecture params 
NETWORK_TYPE = 'rnn'
RNN_CELL_TYPE = 'gru'
NUM_LAYERS = 2 
ENC_HIDDEN_DIM = 512
DEC_HIDDEN_DIM = 2 * ENC_HIDDEN_DIM 
TEACHER_FORCING_RATIO = 1
CLIP_GRAD_MAX_NORM = 1
ENC_DROPOUT = 0.2 
DEC_DROPOUT = 0.2  
ATTENTION_TYPE = 'additive'

# training params  
NUM_EPOCHS = 15 
LR = 0.0001 
OPTIMIZER = 'Adam'
LAZY_TRAIN = False

# name the model and experiment 
if NETWORK_TYPE == 'rnn': 
    EXPERIMENT_NAME = '{}-rnn-{}-attn'.format(SRC_LANG, ATTENTION_TYPE)
elif NETWORK_TYPE == 'cnn': 
    EXPERIMENT_NAME = '{}-cnn'.format(SRC_LANG)
MODEL_NAME = '{}-{}'.format(EXPERIMENT_NAME, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

In [7]:
# store as dict to save to results later 
params = {'experiment_name': EXPERIMENT_NAME,'model_name': MODEL_NAME, 'src_lang': SRC_LANG, 'targ_lang': TARG_LANG, 
          'rnn_cell_type': RNN_CELL_TYPE, 'src_max_sentence_len': SRC_MAX_SENTENCE_LEN, 
          'targ_max_sentence_len': TARG_MAX_SENTENCE_LEN, 'src_vocab_size': SRC_VOCAB_SIZE, 
          'targ_vocab_size': TARG_VOCAB_SIZE, 'num_layers': NUM_LAYERS, 'enc_hidden_dim': ENC_HIDDEN_DIM, 
          'dec_hidden_dim': DEC_HIDDEN_DIM, 'teacher_forcing_ratio': TEACHER_FORCING_RATIO, 
          'clip_grad_max_norm': CLIP_GRAD_MAX_NORM, 'enc_dropout': ENC_DROPOUT, 'dec_dropout': DEC_DROPOUT, 
          'attention_type': ATTENTION_TYPE, 'batch_size': BATCH_SIZE, 'num_epochs': NUM_EPOCHS, 
          'learning_rate': LR, 'optimizer': OPTIMIZER, 'lazy_train': LAZY_TRAIN} 

In [8]:
# define model 

encoder = EncoderRNN(rnn_cell_type=RNN_CELL_TYPE, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
                     src_max_sentence_len=SRC_MAX_SENTENCE_LEN, enc_dropout=ENC_DROPOUT, 
                     pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))

if ATTENTION_TYPE == 'without': 
    # without attention 
    decoder = DecoderRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS,
                         targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
                         pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], 
                                                                vocab[TARG_LANG]['token2id']))
    model = EncoderDecoder(encoder, decoder, vocab[TARG_LANG]['token2id']).to(device)
    
else: 
    # with attention 
    decoder = DecoderAttnRNN(rnn_cell_type=RNN_CELL_TYPE, dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, 
                             num_layers=NUM_LAYERS, targ_vocab_size=TARG_VOCAB_SIZE, 
                             src_max_sentence_len=SRC_MAX_SENTENCE_LEN, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
                             dec_dropout=DEC_DROPOUT, attention_type=ATTENTION_TYPE,
                             pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], 
                                                                    vocab[TARG_LANG]['token2id']))
    model = EncoderDecoderAttn(encoder, decoder, vocab[TARG_LANG]['token2id']).to(device) 

In [None]:
model, results = train_and_eval(
    model=model, loaders_full=loaders_full, loaders_minibatch=loaders_minibatch, loaders_minitrain=loaders_minitrain, 
    params=params, vocab=vocab, print_intermediate=100, save_checkpoint=True, save_to_log=True, 
    lazy_eval=True, print_attn=True, inspect_samples=1)

Epoch: 0.00, Train Loss: 0.00, Val Loss: 10.29, Train BLEU: 0.00, Val BLEU: 0.00, Minutes Elapsed: 0.13
Sampling from val predictions...
Source: chúng_ta những nạn_nhân cần đến tất_cả mọi người . <EOS>
Reference: we victims need everyone . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> caption caption caption caption elitist elitist elitist freely freely
Attention Weights: tensor([[0.0986, 0.0983, 0.0985, 0.1000, 0.1001, 0.1001, 0.1007, 0.1006, 0.1005,
         0.1027],
        [0.0986, 0.0983, 0.0985, 0.1000, 0.1001, 0.1001, 0.1007, 0.1006, 0.1005,
         0.1027],
        [0.0986, 0.0983, 0.0985, 0.1000, 0.1001, 0.1001, 0.1007, 0.1006, 0.1005,
         0.1027],
        [0.0986, 0.0983, 0.0985, 0.1000, 0.1001, 0.1001, 0.1007, 0.1006, 0.1005,
         0.1027],
        [0.0986, 0.0983, 0.0985, 0.1000, 0.1001, 0.1001, 0.1007, 0.1006, 0.1005,
         0.1027],
        [0.0986, 0.0983, 0.0985, 0.1000, 0.1001, 0.1001, 0.1007, 0.1006, 0.1005,
         0.1027],
        [0.0986, 0.0983, 0.0985, 0.1000,



Epoch: 0.10, Train Loss: 0.00, Val Loss: 6.21, Train BLEU: 0.00, Val BLEU: 1.30, Minutes Elapsed: 5.19
Sampling from val predictions...
Source: các gia_đình hàng_xóm nghe kể về ý_tưởng này . <EOS>
Reference: and my neighboring homes heard about this idea .
Model: <SOS> and &apos;s the the . . . . .
Attention Weights: tensor([[8.3157e-06, 4.1053e-03, 8.9195e-02, 2.5529e-01, 2.5606e-01, 2.0147e-01,
         1.3048e-01, 5.1371e-02, 9.3558e-03, 2.6709e-03],
        [8.7240e-05, 1.3469e-02, 1.0965e-01, 2.1270e-01, 2.0989e-01, 1.7924e-01,
         1.3910e-01, 8.2685e-02, 3.1731e-02, 2.1444e-02],
        [1.4440e-04, 5.4484e-03, 2.8936e-02, 5.1302e-02, 5.0970e-02, 4.5558e-02,
         4.0251e-02, 3.4335e-02, 3.8145e-02, 7.0491e-01],
        [1.2625e-05, 3.3452e-04, 1.8714e-03, 3.4943e-03, 3.4164e-03, 3.0291e-03,
         2.7818e-03, 2.7626e-03, 5.7113e-03, 9.7659e-01],
        [4.1132e-06, 1.0739e-04, 6.6137e-04, 1.2866e-03, 1.2309e-03, 1.0732e-03,
         9.9927e-04, 1.0725e-03, 3.0761e-03,



Epoch: 0.14, Train Loss: 0.00, Val Loss: 6.15, Train BLEU: 0.00, Val BLEU: 1.36, Minutes Elapsed: 7.69
Sampling from val predictions...
Source: bây_giờ tôi muốn giới_thiệu các bạn với những người em_trai
Reference: now i &apos;d like to introduce you to my
Model: <SOS> and , , , , , , , ,
Attention Weights: tensor([[1.7282e-06, 3.5568e-05, 1.3156e-03, 3.7219e-02, 1.7338e-01, 3.2576e-01,
         3.0054e-01, 1.5262e-01, 9.1044e-03, 2.7114e-05],
        [1.7126e-05, 3.0989e-04, 6.4648e-03, 7.2837e-02, 1.9150e-01, 2.8140e-01,
         2.6474e-01, 1.6437e-01, 1.8301e-02, 6.2360e-05],
        [1.4059e-04, 1.7037e-03, 1.8358e-02, 9.8321e-02, 1.8429e-01, 2.3224e-01,
         2.2533e-01, 1.7845e-01, 5.9534e-02, 1.6301e-03],
        [5.3611e-04, 3.6287e-03, 2.3545e-02, 9.4603e-02, 1.6717e-01, 2.0627e-01,
         2.0373e-01, 1.7875e-01, 1.0259e-01, 1.9182e-02],
        [7.2082e-04, 3.8620e-03, 2.1698e-02, 8.5845e-02, 1.5745e-01, 1.9778e-01,
         1.9591e-01, 1.7526e-01, 1.1587e-01, 4.5603e-0



Epoch: 0.19, Train Loss: 0.00, Val Loss: 6.14, Train BLEU: 0.00, Val BLEU: 1.44, Minutes Elapsed: 10.19
Sampling from val predictions...
Source: cô cũng đã có bản_sao của bức ảnh . <EOS>
Reference: she also had <UNK> . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> and &apos;s is the the . . . .
Attention Weights: tensor([[3.1292e-05, 3.6202e-03, 9.7324e-02, 3.3806e-01, 3.9314e-01, 1.3732e-01,
         2.6389e-02, 3.2184e-03, 5.8511e-04, 3.1986e-04],
        [1.8756e-04, 1.2437e-02, 1.4713e-01, 3.3343e-01, 3.6238e-01, 1.2223e-01,
         2.0536e-02, 1.5367e-03, 1.1547e-04, 1.8871e-05],
        [3.5653e-04, 2.0291e-02, 1.5693e-01, 2.9468e-01, 3.1801e-01, 1.5951e-01,
         4.4742e-02, 5.0739e-03, 3.6289e-04, 4.0808e-05],
        [1.1531e-03, 2.9656e-02, 1.3656e-01, 2.1912e-01, 2.4009e-01, 1.9036e-01,
         1.1862e-01, 4.6776e-02, 1.2559e-02, 5.1102e-03],
        [9.0547e-04, 1.5486e-02, 6.8923e-02, 1.1573e-01, 1.3175e-01, 1.2594e-01,
         1.1540e-01, 1.0272e-01, 1.0496e-01, 2.1819e-01],



Epoch: 0.24, Train Loss: 0.00, Val Loss: 6.09, Train BLEU: 0.00, Val BLEU: 1.22, Minutes Elapsed: 12.69
Sampling from val predictions...
Source: bức này được chụp vài tuần sau sự_kiện 11/9 ,
Reference: this one was taken just weeks after 9 /
Model: <SOS> so i , , , , , , ,
Attention Weights: tensor([[1.0457e-06, 1.5149e-05, 6.0338e-04, 2.2053e-02, 1.4643e-01, 4.2314e-01,
         3.2781e-01, 7.0959e-02, 8.1638e-03, 8.2759e-04],
        [4.2169e-06, 7.0708e-05, 2.3426e-03, 4.7131e-02, 1.8065e-01, 3.9584e-01,
         2.9627e-01, 7.0941e-02, 6.4913e-03, 2.6289e-04],
        [9.4693e-06, 1.7831e-04, 4.9891e-03, 6.2688e-02, 1.8065e-01, 3.1932e-01,
         2.8208e-01, 1.2204e-01, 2.5975e-02, 2.0665e-03],
        [1.2596e-04, 9.5308e-04, 8.7100e-03, 4.9105e-02, 1.1280e-01, 1.7860e-01,
         1.9239e-01, 1.6182e-01, 1.4628e-01, 1.4921e-01],
        [1.1593e-04, 5.2860e-04, 3.1579e-03, 1.5769e-02, 3.9052e-02, 6.7206e-02,
         8.0280e-02, 8.3599e-02, 1.4264e-01, 5.6765e-01],
        [7.1



Epoch: 0.29, Train Loss: 0.00, Val Loss: 6.06, Train BLEU: 0.00, Val BLEU: 2.00, Minutes Elapsed: 15.19
Sampling from val predictions...
Source: trước_tiên , bạn phải mang đến cho họ sự bảo_mật
Reference: first , you have to offer them <UNK> .
Model: <SOS> i , , , , , , , ,
Attention Weights: tensor([[2.6359e-06, 5.9158e-05, 1.4391e-03, 2.7887e-02, 1.5529e-01, 2.3204e-01,
         3.3462e-01, 1.9430e-01, 5.0176e-02, 4.1809e-03],
        [1.3446e-05, 3.0760e-04, 5.4670e-03, 5.6093e-02, 1.8316e-01, 2.3615e-01,
         2.8934e-01, 1.8256e-01, 4.4576e-02, 2.3322e-03],
        [2.0934e-05, 4.7237e-04, 7.3419e-03, 5.3104e-02, 1.3759e-01, 1.7423e-01,
         2.2129e-01, 2.0359e-01, 1.4693e-01, 5.5428e-02],
        [1.5129e-05, 1.1361e-04, 7.9439e-04, 4.2160e-03, 1.1913e-02, 1.6806e-02,
         2.7642e-02, 4.1169e-02, 1.2720e-01, 7.7013e-01],
        [4.0656e-06, 1.9490e-05, 1.0138e-04, 5.2009e-04, 1.7202e-03, 2.6797e-03,
         5.4974e-03, 1.0879e-02, 6.7424e-02, 9.1115e-01],
        [3.



Epoch: 0.34, Train Loss: 0.00, Val Loss: 6.04, Train BLEU: 0.00, Val BLEU: 2.23, Minutes Elapsed: 17.67
Sampling from val predictions...
Source: đây là những ý_tưởng cần_thiết mà một đất_nước đã bị
Reference: these are the ideals that a war-torn libya needs
Model: <SOS> it &apos;s a a a a a a a
Attention Weights: tensor([[8.1807e-04, 4.2498e-02, 5.2140e-01, 3.9098e-01, 4.3869e-02, 4.0526e-04,
         2.7698e-05, 5.3590e-06, 6.8273e-07, 2.7185e-07],
        [4.3248e-03, 9.0189e-02, 5.3184e-01, 3.2518e-01, 4.7631e-02, 7.6097e-04,
         6.6216e-05, 1.3734e-05, 2.0928e-06, 9.0415e-07],
        [2.2520e-03, 3.7600e-02, 2.4581e-01, 4.2283e-01, 2.7163e-01, 1.7383e-02,
         2.0320e-03, 4.1716e-04, 3.8149e-05, 1.2422e-05],
        [1.0343e-03, 4.4474e-03, 1.7510e-02, 6.3187e-02, 2.2266e-01, 2.5937e-01,
         2.0730e-01, 1.6194e-01, 4.2664e-02, 1.9886e-02],
        [2.0720e-04, 8.9477e-04, 4.0513e-03, 2.0703e-02, 1.0846e-01, 2.0928e-01,
         2.3817e-01, 2.4812e-01, 1.0692e-01, 6.3



Epoch: 0.38, Train Loss: 0.00, Val Loss: 5.99, Train BLEU: 0.00, Val BLEU: 2.75, Minutes Elapsed: 20.16
Sampling from val predictions...
Source: có một người phụ_nữ địa_phương tuyệt_vời đã hướng_dẫn chúng_tôi .
Reference: we had an amazing local woman who guided us
Model: <SOS> so &apos;s is the the the . . <EOS>
Attention Weights: tensor([[1.1766e-05, 1.6281e-03, 7.7843e-02, 5.0699e-01, 3.3741e-01, 7.4112e-02,
         1.7175e-03, 2.4495e-04, 2.8698e-05, 8.2875e-06],
        [1.6196e-04, 8.2038e-03, 1.2017e-01, 4.5895e-01, 3.3688e-01, 7.3145e-02,
         2.2353e-03, 2.3913e-04, 1.5733e-05, 2.6149e-06],
        [1.2515e-04, 7.2213e-03, 1.1189e-01, 3.6483e-01, 3.3615e-01, 1.6235e-01,
         1.4132e-02, 2.9679e-03, 2.9092e-04, 5.0445e-05],
        [4.7738e-05, 4.4748e-04, 4.7278e-03, 1.5168e-02, 1.8068e-02, 3.3904e-02,
         2.2382e-02, 4.8073e-02, 1.5776e-01, 6.9942e-01],
        [1.0438e-06, 4.8754e-06, 3.8656e-05, 1.2309e-04, 1.5416e-04, 4.9599e-04,
         6.2262e-04, 3.7698e-



Epoch: 0.43, Train Loss: 0.00, Val Loss: 5.89, Train BLEU: 0.00, Val BLEU: 3.04, Minutes Elapsed: 22.62
Sampling from val predictions...
Source: đây là một bản_vẽ cách để khảo_sát toàn xã_hội bởi_lẽ
Reference: this is a blueprint how to survey your society
Model: <SOS> it &apos;s a a a a , , ,
Attention Weights: tensor([[0.0541, 0.4531, 0.3497, 0.0727, 0.0493, 0.0179, 0.0026, 0.0004, 0.0001,
         0.0000],
        [0.0902, 0.5222, 0.3071, 0.0468, 0.0247, 0.0076, 0.0012, 0.0002, 0.0000,
         0.0000],
        [0.0204, 0.2503, 0.3222, 0.1230, 0.1560, 0.1010, 0.0218, 0.0040, 0.0007,
         0.0006],
        [0.0011, 0.0062, 0.0132, 0.0205, 0.0796, 0.2727, 0.2488, 0.1784, 0.0753,
         0.1044],
        [0.0001, 0.0003, 0.0009, 0.0024, 0.0124, 0.0709, 0.1311, 0.2167, 0.2106,
         0.3546],
        [0.0000, 0.0002, 0.0005, 0.0016, 0.0087, 0.0522, 0.1076, 0.2028, 0.2336,
         0.3926],
        [0.0000, 0.0002, 0.0005, 0.0016, 0.0083, 0.0486, 0.1017, 0.1977, 0.2387,
         0.



Epoch: 0.48, Train Loss: 0.00, Val Loss: 5.84, Train BLEU: 0.00, Val BLEU: 2.49, Minutes Elapsed: 25.09
Sampling from val predictions...
Source: đủ để trồng <UNK> triệu cây cà_chua . <EOS> <PAD>
Reference: that &apos;s enough space to plant <UNK> million tomato
Model: <SOS> we , , , , . . . <EOS>
Attention Weights: tensor([[0.5705, 0.3861, 0.0112, 0.0291, 0.0019, 0.0007, 0.0004, 0.0002, 0.0001,
         0.0000],
        [0.2897, 0.5732, 0.0453, 0.0887, 0.0027, 0.0004, 0.0001, 0.0000, 0.0000,
         0.0000],
        [0.0007, 0.0051, 0.0090, 0.8024, 0.1100, 0.0459, 0.0191, 0.0061, 0.0016,
         0.0000],
        [0.0000, 0.0002, 0.0004, 0.3368, 0.1461, 0.1915, 0.2078, 0.0990, 0.0182,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0076, 0.0144, 0.0694, 0.2765, 0.4591, 0.1730,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0007, 0.0021, 0.0141, 0.0950, 0.4165, 0.4716,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0003, 0.0010, 0.0070, 0.0528, 0.3265, 0.6124,
        



Epoch: 0.53, Train Loss: 0.00, Val Loss: 5.77, Train BLEU: 0.00, Val BLEU: 3.75, Minutes Elapsed: 27.59
Sampling from val predictions...
Source: peter <UNK> là một giáo_sư triết_học trước khi làm công_việc
Reference: peter <UNK> was a professor of philosophy before becoming
Model: <SOS> but we we we we the the , the
Attention Weights: tensor([[1.0446e-03, 8.9460e-01, 1.0348e-01, 8.3351e-04, 1.8015e-05, 2.4278e-05,
         5.5912e-06, 1.3471e-06, 1.1400e-06, 5.8245e-07],
        [1.6179e-03, 9.0266e-01, 9.5224e-02, 4.8784e-04, 4.5768e-06, 2.3822e-06,
         2.5679e-07, 3.2492e-08, 1.7214e-08, 6.7119e-09],
        [6.8701e-06, 8.1341e-01, 1.8623e-01, 3.4490e-04, 8.0103e-07, 3.8665e-07,
         2.8461e-08, 3.1339e-09, 1.7809e-09, 7.2869e-10],
        [5.3901e-07, 2.0118e-01, 7.7351e-01, 2.5129e-02, 1.1293e-04, 7.4057e-05,
         1.8909e-06, 5.5528e-08, 1.9241e-08, 4.2645e-09],
        [4.7231e-07, 1.0270e-02, 4.9072e-01, 3.3483e-01, 3.2930e-02, 1.2297e-01,
         7.9903e-03, 2.146



Epoch: 0.58, Train Loss: 0.00, Val Loss: 5.67, Train BLEU: 0.00, Val BLEU: 3.52, Minutes Elapsed: 30.08
Sampling from val predictions...
Source: đó là thời_điểm tuyệt_vọng nhất trong cuộc_đời tôi . <EOS>
Reference: this was one of the lowest points in my
Model: <SOS> it &apos;s a the the . . . .
Attention Weights: tensor([[1.8613e-01, 7.8057e-01, 3.2145e-02, 1.1084e-03, 4.0639e-05, 2.9356e-06,
         5.0399e-07, 3.0161e-07, 3.6258e-08, 8.5195e-09],
        [3.9610e-02, 7.5760e-01, 1.9241e-01, 1.0146e-02, 2.1940e-04, 8.7719e-06,
         7.3705e-07, 3.1033e-07, 4.6370e-08, 9.7483e-09],
        [3.1201e-04, 1.8002e-02, 3.5146e-01, 5.5716e-01, 6.6564e-02, 5.6784e-03,
         5.5388e-04, 2.4234e-04, 2.2225e-05, 2.2705e-06],
        [1.2947e-05, 2.6223e-04, 1.2604e-02, 4.4706e-01, 4.3659e-01, 8.8613e-02,
         1.0611e-02, 4.1626e-03, 8.2793e-05, 1.7899e-06],
        [5.1065e-06, 5.0826e-05, 1.2875e-03, 6.3951e-02, 3.0022e-01, 3.0204e-01,
         1.8520e-01, 1.4083e-01, 6.2681e-03, 1.



Epoch: 0.62, Train Loss: 0.00, Val Loss: 5.60, Train BLEU: 0.00, Val BLEU: 3.40, Minutes Elapsed: 32.59
Sampling from val predictions...
Source: và điều thứ_hai tôi muốn nói_là mọi người nghĩ rằng
Reference: and my second message is that everyone thinks domestic
Model: <SOS> and i i i i i i i to
Attention Weights: tensor([[1.1037e-01, 7.0491e-01, 1.4750e-01, 3.7063e-02, 1.3919e-04, 7.7674e-06,
         3.9060e-06, 1.6860e-06, 6.7571e-07, 5.0811e-07],
        [6.8913e-02, 7.1355e-01, 1.9303e-01, 2.4452e-02, 5.5080e-05, 1.0403e-06,
         2.9983e-07, 8.1419e-08, 2.1812e-08, 1.1737e-08],
        [1.0286e-03, 1.0157e-01, 6.7504e-01, 2.2218e-01, 1.7347e-04, 9.6359e-07,
         2.1646e-07, 4.4960e-08, 9.9329e-09, 5.3106e-09],
        [1.7969e-06, 2.3861e-04, 8.3730e-02, 9.1400e-01, 2.0250e-03, 4.5371e-06,
         6.9935e-07, 6.8191e-08, 6.1186e-09, 2.3409e-09],
        [1.8520e-07, 7.2519e-06, 3.8203e-03, 9.6048e-01, 3.5281e-02, 3.5348e-04,
         5.3350e-05, 2.1579e-06, 4.4610e-08, 7.



Epoch: 0.67, Train Loss: 0.00, Val Loss: 5.55, Train BLEU: 0.00, Val BLEU: 3.72, Minutes Elapsed: 35.08
Sampling from val predictions...
Source: câu_hỏi cuối_cùng mà người_ta hỏi tôi là : " cảm_giác
Reference: so the last question people ask me is ,
Model: <SOS> so , the , i i : &quot; &quot;
Attention Weights: tensor([[4.9055e-01, 3.8774e-01, 1.1988e-01, 1.7320e-03, 9.6985e-05, 4.6135e-06,
         2.0300e-07, 4.6817e-08, 1.3970e-07, 3.0262e-08],
        [3.2392e-02, 2.5114e-01, 6.9217e-01, 2.3467e-02, 8.2104e-04, 9.5871e-06,
         8.2926e-08, 5.7011e-09, 2.1211e-08, 1.6315e-09],
        [9.5558e-05, 1.9940e-03, 4.9421e-01, 4.0883e-01, 9.2676e-02, 2.1936e-03,
         3.9060e-06, 6.6337e-08, 5.3904e-07, 1.0028e-08],
        [6.7745e-06, 5.9751e-05, 1.3277e-02, 1.9278e-01, 5.5696e-01, 2.3520e-01,
         1.4225e-03, 1.2737e-05, 2.8420e-04, 7.4746e-07],
        [1.5426e-06, 1.0503e-05, 1.0207e-03, 2.6593e-02, 2.6478e-01, 6.5085e-01,
         2.9244e-02, 9.3824e-04, 2.6473e-02, 8.747



Epoch: 0.77, Train Loss: 0.00, Val Loss: 5.46, Train BLEU: 0.00, Val BLEU: 5.28, Minutes Elapsed: 40.02
Sampling from val predictions...
Source: nô_lệ ngày_nay phục_vụ cho thương_mại , hàng_hoá mà người nô_lệ
Reference: today &apos;s slavery is about commerce , so the
Model: <SOS> it &apos;s the to the , , , ,
Attention Weights: tensor([[8.0050e-01, 1.4997e-01, 4.2372e-02, 3.2528e-03, 9.9424e-04, 1.2077e-03,
         8.9237e-04, 4.5219e-04, 2.0877e-04, 1.4913e-04],
        [5.4096e-02, 5.4102e-01, 3.9715e-01, 6.4573e-03, 6.4467e-04, 4.4545e-04,
         1.2498e-04, 4.2546e-05, 1.4682e-05, 7.4359e-06],
        [2.2727e-04, 4.3074e-02, 9.1096e-01, 4.0492e-02, 2.8058e-03, 2.0365e-03,
         3.1641e-04, 6.9798e-05, 1.4399e-05, 4.7048e-06],
        [1.3490e-05, 1.1751e-03, 4.8741e-01, 3.4837e-01, 6.4312e-02, 9.2415e-02,
         5.4833e-03, 7.5666e-04, 6.0683e-05, 6.3060e-06],
        [3.0373e-05, 5.8892e-04, 7.5368e-02, 2.6083e-01, 1.5556e-01, 4.5980e-01,
         3.7788e-02, 8.9655e-03,



Epoch: 0.82, Train Loss: 0.00, Val Loss: 5.40, Train BLEU: 0.00, Val BLEU: 4.73, Minutes Elapsed: 42.48
Sampling from val predictions...
Source: vì không có kền_kền nên mới có sự tràn_lan về
Reference: because there are no vultures , there &apos;s been
Model: <SOS> so &apos;s not a a a a a a
Attention Weights: tensor([[1.5270e-01, 8.1646e-01, 3.0560e-02, 2.5537e-04, 1.1453e-05, 4.1168e-06,
         1.1056e-06, 3.1507e-07, 7.8508e-08, 8.3457e-08],
        [5.1992e-03, 7.2852e-01, 2.6317e-01, 3.0718e-03, 3.3307e-05, 3.3300e-06,
         2.7895e-07, 3.3756e-08, 5.9625e-09, 5.7102e-09],
        [1.1721e-04, 3.8180e-02, 7.0668e-01, 2.3868e-01, 1.4455e-02, 1.8037e-03,
         8.4603e-05, 3.8370e-06, 2.2505e-07, 1.9238e-07],
        [3.4558e-06, 1.1101e-04, 4.9435e-03, 1.1941e-01, 4.0641e-01, 4.3052e-01,
         3.7751e-02, 8.2623e-04, 9.2130e-06, 6.6647e-06],
        [1.1008e-06, 1.2358e-05, 2.0781e-04, 4.6576e-03, 9.7083e-02, 6.4065e-01,
         2.4246e-01, 1.4384e-02, 2.1824e-04, 3.1993



Epoch: 0.86, Train Loss: 0.00, Val Loss: 5.34, Train BLEU: 0.00, Val BLEU: 5.32, Minutes Elapsed: 44.97
Sampling from val predictions...
Source: ở ấn_độ và nepal , tôi được đưa tới 1
Reference: in india and nepal , i was introduced to
Model: <SOS> in , , , , i i to to
Attention Weights: tensor([[8.7168e-01, 1.2829e-01, 2.7023e-05, 1.7518e-06, 2.4356e-07, 9.0148e-08,
         1.5688e-08, 7.8030e-09, 3.7179e-09, 3.8579e-09],
        [2.5066e-02, 9.6946e-01, 4.8557e-03, 5.9817e-04, 1.6818e-05, 1.2238e-06,
         4.3601e-08, 8.1133e-09, 1.8570e-09, 1.4985e-09],
        [1.6787e-03, 4.5425e-01, 2.1219e-01, 2.8655e-01, 4.2493e-02, 2.8243e-03,
         1.5308e-05, 6.6289e-07, 4.5426e-08, 2.7738e-08],
        [2.2424e-04, 3.0858e-02, 8.4996e-02, 2.6502e-01, 4.6017e-01, 1.5720e-01,
         1.4835e-03, 5.1738e-05, 1.7593e-06, 7.6845e-07],
        [2.1737e-05, 1.4740e-03, 8.9649e-03, 5.0626e-02, 3.7042e-01, 5.5375e-01,
         1.4057e-02, 6.6243e-04, 2.0480e-05, 7.6438e-06],
        [9.5537e-



Epoch: 0.91, Train Loss: 0.00, Val Loss: 5.29, Train BLEU: 0.00, Val BLEU: 5.18, Minutes Elapsed: 47.45
Sampling from val predictions...
Source: tôi luôn sống trong một nỗi sợ thường_trực rằng danh_tính
Reference: so i was living in constant fear that my
Model: <SOS> i i i in in in of the the
Attention Weights: tensor([[9.9237e-01, 7.4847e-03, 1.3464e-04, 8.5499e-06, 1.1443e-06, 3.5057e-07,
         2.1352e-07, 1.3668e-07, 6.8434e-08, 4.3427e-08],
        [2.3590e-01, 7.0730e-01, 5.5778e-02, 1.0062e-03, 1.2297e-05, 7.9729e-07,
         2.0646e-07, 6.0625e-08, 1.5731e-08, 6.6448e-09],
        [1.1547e-03, 1.9728e-01, 7.0373e-01, 9.5871e-02, 1.8803e-03, 7.8705e-05,
         1.0351e-05, 1.3845e-06, 1.2015e-07, 2.7268e-08],
        [1.5006e-05, 4.8522e-03, 3.2986e-01, 6.3664e-01, 2.7471e-02, 1.0387e-03,
         1.0914e-04, 9.1701e-06, 3.1106e-07, 2.8646e-08],
        [5.3687e-06, 1.5807e-04, 2.3340e-02, 7.3547e-01, 2.1114e-01, 2.4184e-02,
         4.8809e-03, 7.6467e-04, 5.3167e-05, 5.611



Epoch: 0.96, Train Loss: 0.00, Val Loss: 5.26, Train BLEU: 0.00, Val BLEU: 5.66, Minutes Elapsed: 49.93
Sampling from val predictions...
Source: mà tôi đến đây để thú_nhận rằng chúng_tôi - toàn_bộ
Reference: i &apos;m rather here today to confess that we
Model: <SOS> i i to to to to to to the
Attention Weights: tensor([[9.1450e-01, 8.5334e-02, 1.5667e-04, 8.7753e-06, 1.6031e-06, 3.2249e-07,
         1.9355e-07, 1.3943e-07, 8.0760e-08, 5.3230e-08],
        [2.2294e-01, 7.6179e-01, 1.4771e-02, 4.8101e-04, 1.5964e-05, 7.7097e-07,
         2.3120e-07, 9.2500e-08, 4.4787e-08, 1.8298e-08],
        [5.9071e-03, 2.9158e-01, 5.0441e-01, 1.8674e-01, 1.1199e-02, 1.4698e-04,
         1.4277e-05, 1.9056e-06, 4.1599e-07, 5.3993e-08],
        [9.1625e-06, 6.1850e-04, 2.2300e-02, 5.9031e-01, 3.7351e-01, 1.1277e-02,
         1.7519e-03, 1.9744e-04, 2.7344e-05, 1.0596e-06],
        [2.7786e-05, 4.9968e-04, 1.2259e-02, 4.4714e-01, 5.0067e-01, 2.8668e-02,
         8.4528e-03, 1.8149e-03, 4.4015e-04, 2.429



Epoch: 1.00, Train Loss: 0.00, Val Loss: 5.28, Train BLEU: 0.00, Val BLEU: 6.03, Minutes Elapsed: 52.02
Sampling from val predictions...
Source: trong đó có viết : khi chị đọc được những
Reference: it read , &quot; when you read this ,
Model: <SOS> in the a the : , can to the
Attention Weights: tensor([[8.6388e-01, 1.3544e-01, 6.7670e-04, 1.5325e-06, 2.6926e-07, 2.9850e-07,
         1.1137e-07, 4.2744e-08, 1.6382e-08, 9.4509e-09],
        [5.4980e-02, 9.0591e-01, 3.9082e-02, 2.1801e-05, 8.7538e-07, 5.9822e-07,
         6.7294e-08, 1.4676e-08, 3.8120e-09, 1.4178e-09],
        [9.9959e-04, 2.4220e-01, 7.4983e-01, 6.5623e-03, 1.9802e-04, 2.0079e-04,
         3.2182e-06, 1.3476e-07, 8.0545e-09, 1.3225e-09],
        [1.6443e-04, 1.8972e-02, 5.2727e-01, 1.8219e-01, 4.7285e-02, 2.1023e-01,
         1.3258e-02, 6.0851e-04, 1.4469e-05, 8.1195e-07],
        [4.2891e-06, 2.0819e-04, 9.0535e-03, 3.0172e-02, 5.3070e-02, 6.6945e-01,
         2.0445e-01, 3.1958e-02, 1.5249e-03, 1.0048e-04],
        [



Epoch: 1.05, Train Loss: 0.00, Val Loss: 5.22, Train BLEU: 0.00, Val BLEU: 6.15, Minutes Elapsed: 54.59
Sampling from val predictions...
Source: tôi như bị tát vào mặt khi đọc cuốn sách
Reference: i was given a slap in the face reading
Model: <SOS> i i to to to to to , of
Attention Weights: tensor([[9.9557e-01, 4.2010e-03, 2.2152e-04, 3.9503e-06, 3.4180e-07, 1.1063e-07,
         5.1102e-08, 3.5063e-08, 2.5537e-08, 1.6048e-08],
        [2.2293e-01, 5.5896e-01, 2.1616e-01, 1.9203e-03, 2.2978e-05, 1.3741e-06,
         1.3642e-07, 3.8396e-08, 1.6925e-08, 6.1387e-09],
        [3.8388e-03, 7.7396e-02, 8.0802e-01, 1.0390e-01, 6.2763e-03, 5.2751e-04,
         3.4151e-05, 4.9985e-06, 1.0753e-06, 1.3858e-07],
        [9.0357e-05, 5.8155e-04, 1.3947e-01, 5.8879e-01, 2.3839e-01, 3.0435e-02,
         1.7948e-03, 3.5908e-04, 7.6750e-05, 6.2066e-06],
        [2.3200e-05, 1.3225e-04, 2.2208e-02, 3.0809e-01, 4.7219e-01, 1.6860e-01,
         2.1658e-02, 5.6029e-03, 1.3688e-03, 1.2778e-04],
        [3.49



Epoch: 1.10, Train Loss: 0.00, Val Loss: 5.17, Train BLEU: 0.00, Val BLEU: 6.85, Minutes Elapsed: 57.05
Sampling from val predictions...
Source: để tôi nói bạn biết một bí_mật . <EOS> <PAD>
Reference: let me tell you a secret . <EOS> <PAD>
Model: <SOS> i i want you you it . <EOS> <EOS>
Attention Weights: tensor([[0.9630, 0.0366, 0.0004, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1440, 0.6888, 0.1646, 0.0025, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0004, 0.0198, 0.6443, 0.3245, 0.0109, 0.0001, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0002, 0.0550, 0.6822, 0.2542, 0.0084, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0022, 0.2424, 0.5518, 0.1939, 0.0094, 0.0004, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0006, 0.0877, 0.3904, 0.3889, 0.1130, 0.0190, 0.0005,
         0.0000],
        [0.0000, 0.0000, 0.0003, 0.0318, 0.2573, 0.4105, 0.2180, 0.0783, 0.0037,
         0.0000],
 



Epoch: 1.14, Train Loss: 0.00, Val Loss: 5.17, Train BLEU: 0.00, Val BLEU: 6.81, Minutes Elapsed: 59.52
Sampling from val predictions...
Source: và những điều đó là sự_thật , nhưng đó chỉ
Reference: and those things are true , but they &apos;re
Model: <SOS> and the the is is , , , ,
Attention Weights: tensor([[2.6406e-02, 9.2069e-01, 5.2845e-02, 5.5866e-05, 6.5481e-07, 4.4988e-08,
         2.2057e-08, 9.4809e-09, 5.5495e-09, 3.5182e-09],
        [6.1228e-03, 6.3170e-01, 3.6169e-01, 4.8363e-04, 3.5651e-06, 1.3097e-07,
         3.2065e-08, 5.9752e-09, 1.9119e-09, 9.8889e-10],
        [1.7282e-04, 1.8992e-02, 9.3934e-01, 4.1325e-02, 1.7012e-04, 9.7777e-07,
         1.1887e-07, 1.4576e-08, 3.6745e-09, 1.6208e-09],
        [1.9404e-05, 4.6808e-04, 8.3256e-02, 6.3064e-01, 2.8207e-01, 3.3881e-03,
         1.6424e-04, 1.2391e-06, 5.7881e-08, 7.2852e-09],
        [7.2716e-06, 3.3912e-05, 9.1442e-04, 2.2498e-02, 3.7487e-01, 2.9048e-01,
         3.0511e-01, 5.8946e-03, 1.8008e-04, 9.2102e-06],
  



Epoch: 1.19, Train Loss: 0.00, Val Loss: 5.15, Train BLEU: 0.00, Val BLEU: 6.29, Minutes Elapsed: 62.00
Sampling from val predictions...
Source: khi gạch tên đi thì sao ? gạch đi là
Reference: what do bullets do ? bullets kill , and
Model: <SOS> so what you you ? ? ? ? is
Attention Weights: tensor([[8.7963e-01, 1.1877e-01, 1.5520e-03, 3.3997e-05, 6.0530e-06, 1.9424e-06,
         1.5913e-06, 4.3164e-07, 1.6611e-07, 1.0339e-07],
        [5.4343e-02, 8.5310e-01, 9.1796e-02, 7.1463e-04, 3.9544e-05, 4.6039e-06,
         2.0760e-06, 1.8292e-07, 3.9377e-08, 1.9373e-08],
        [2.6706e-04, 4.2659e-02, 8.4904e-01, 1.0063e-01, 7.0787e-03, 2.6138e-04,
         6.0040e-05, 8.7167e-07, 4.0882e-08, 8.2205e-09],
        [7.7218e-06, 3.3686e-04, 5.8845e-02, 4.5603e-01, 3.8314e-01, 6.7507e-02,
         3.3602e-02, 5.2537e-04, 7.2803e-06, 4.4102e-07],
        [7.3946e-07, 7.7990e-06, 7.0048e-04, 3.5383e-02, 1.6023e-01, 2.1940e-01,
         5.3939e-01, 4.2761e-02, 1.9736e-03, 1.6025e-04],
        [3.67



Epoch: 1.24, Train Loss: 0.00, Val Loss: 5.11, Train BLEU: 0.00, Val BLEU: 7.25, Minutes Elapsed: 64.50
Sampling from val predictions...
Source: chúng_tôi đã lấy nhiều mẫu_vật từ con đường này và
Reference: so we took a lot of samples from this
Model: <SOS> we &apos;ve have to to of the of and
Attention Weights: tensor([[9.8282e-01, 1.7087e-02, 8.5048e-05, 3.2438e-06, 4.3947e-07, 1.4597e-07,
         3.7056e-08, 2.2699e-08, 1.4213e-08, 9.4608e-09],
        [1.8990e-01, 7.7372e-01, 3.5866e-02, 4.9285e-04, 1.7251e-05, 2.1562e-06,
         1.7456e-07, 7.1227e-08, 3.1454e-08, 1.3938e-08],
        [5.9480e-03, 1.4337e-01, 7.5869e-01, 8.5902e-02, 5.4042e-03, 6.6671e-04,
         1.8267e-05, 2.8185e-06, 4.9360e-07, 9.1710e-08],
        [1.3617e-04, 6.9275e-03, 5.8785e-01, 3.5184e-01, 4.5995e-02, 7.0143e-03,
         2.0587e-04, 2.8314e-05, 3.3323e-06, 4.0453e-07],
        [5.8089e-05, 4.7967e-04, 3.5067e-02, 3.2873e-01, 3.8957e-01, 2.2467e-01,
         1.5702e-02, 4.8942e-03, 7.5125e-04, 7.59



Epoch: 1.29, Train Loss: 0.00, Val Loss: 5.10, Train BLEU: 0.00, Val BLEU: 7.06, Minutes Elapsed: 66.98
Sampling from val predictions...
Source: bà ấy đây . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: there she is . <EOS> <PAD> <PAD> <PAD> <PAD>
Model: <SOS> thank you . . . <EOS> . <EOS> <EOS>
Attention Weights: tensor([[0.9218, 0.0777, 0.0006, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.3792, 0.4351, 0.1813, 0.0043, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0195, 0.0523, 0.5112, 0.4097, 0.0073, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0050, 0.0110, 0.1297, 0.7864, 0.0679, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0046, 0.0094, 0.0874, 0.6836, 0.2150, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0144, 0.0244, 0.1196, 0.5756, 0.2660, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0026, 0.0070, 0.0597, 0.3955, 0.5353, 0.0000, 0.0000, 0.0000, 0.0000,
        



Epoch: 1.34, Train Loss: 0.00, Val Loss: 5.07, Train BLEU: 0.00, Val BLEU: 7.15, Minutes Elapsed: 69.47
Sampling from val predictions...
Source: và cả hai đều chuẩn_xác . nhưng thêm một điều
Reference: and they &apos;re both accurate . okay , but
Model: <SOS> and the the the . . the &apos;s a
Attention Weights: tensor([[5.7358e-02, 4.7153e-01, 4.6096e-01, 9.9356e-03, 1.1771e-04, 7.1352e-05,
         1.0541e-05, 5.9851e-06, 3.2004e-06, 1.6655e-06],
        [2.2745e-02, 2.1043e-01, 6.8875e-01, 7.6746e-02, 9.5279e-04, 3.5257e-04,
         1.4900e-05, 4.1154e-06, 1.2977e-06, 4.5507e-07],
        [1.6758e-03, 5.1665e-03, 6.2016e-02, 8.4451e-01, 6.3908e-02, 2.2557e-02,
         1.4648e-04, 2.1003e-05, 3.0497e-06, 5.4261e-07],
        [7.5378e-05, 8.7124e-05, 5.8073e-04, 2.0962e-02, 8.5476e-02, 8.7158e-01,
         1.8366e-02, 2.6587e-03, 2.0034e-04, 9.6979e-06],
        [4.3251e-05, 2.6487e-05, 8.0336e-05, 1.3575e-03, 1.0750e-02, 8.3938e-01,
         1.0440e-01, 3.8060e-02, 5.4579e-03, 4.445



Epoch: 1.38, Train Loss: 0.00, Val Loss: 5.05, Train BLEU: 0.00, Val BLEU: 7.26, Minutes Elapsed: 71.95
Sampling from val predictions...
Source: tôi hi_vọng những tấm hình sẽ đánh_thức một nguồn sức_mạnh
Reference: i hope that these images awaken a force in
Model: <SOS> i i the i i to a a a
Attention Weights: tensor([[9.9904e-01, 9.3205e-04, 2.6411e-05, 2.8327e-06, 7.2863e-07, 3.0209e-07,
         8.5623e-08, 4.0166e-08, 1.6494e-08, 1.1087e-08],
        [2.2795e-01, 7.5241e-01, 1.9064e-02, 5.4066e-04, 3.8176e-05, 2.5823e-06,
         2.6267e-07, 6.2765e-08, 1.3292e-08, 6.6274e-09],
        [1.1963e-02, 5.2330e-01, 3.8439e-01, 6.0152e-02, 1.7946e-02, 2.0859e-03,
         1.4018e-04, 1.6562e-05, 8.8603e-07, 2.1099e-07],
        [7.8836e-05, 4.3388e-03, 1.7118e-01, 4.5190e-01, 2.9919e-01, 6.7594e-02,
         5.3551e-03, 3.6027e-04, 7.2081e-06, 9.2502e-07],
        [7.8522e-07, 8.4917e-05, 5.8724e-03, 7.5449e-02, 3.0069e-01, 4.3226e-01,
         1.5032e-01, 3.3726e-02, 1.4363e-03, 1.6685e



Epoch: 1.43, Train Loss: 0.00, Val Loss: 5.00, Train BLEU: 0.00, Val BLEU: 7.43, Minutes Elapsed: 74.43
Sampling from val predictions...
Source: tôi cũng đã từng tự_hỏi , thực_tế nó có hoạt_động
Reference: and i also ask myself , does it really
Model: <SOS> i i i to , , it it it
Attention Weights: tensor([[9.9214e-01, 7.7866e-03, 6.7171e-05, 1.9064e-06, 1.1328e-07, 1.1504e-08,
         3.6558e-09, 1.4864e-09, 7.7230e-10, 3.6251e-10],
        [2.5560e-02, 9.4332e-01, 2.9930e-02, 1.1675e-03, 2.1739e-05, 3.3658e-07,
         2.4628e-08, 2.9193e-09, 7.7567e-10, 2.3977e-10],
        [1.9874e-03, 3.2337e-01, 4.1562e-01, 2.2081e-01, 3.6627e-02, 1.4872e-03,
         9.5575e-05, 3.9221e-06, 2.8164e-07, 2.6917e-08],
        [5.9307e-04, 1.6416e-01, 4.0209e-01, 3.4022e-01, 8.8180e-02, 4.3975e-03,
         3.4147e-04, 1.2554e-05, 7.9350e-07, 5.2698e-08],
        [1.3027e-05, 1.6961e-04, 6.5250e-03, 1.1709e-01, 7.2745e-01, 1.2017e-01,
         2.7020e-02, 1.4393e-03, 1.1327e-04, 6.6914e-06],
      



Epoch: 1.48, Train Loss: 0.00, Val Loss: 4.99, Train BLEU: 0.00, Val BLEU: 7.86, Minutes Elapsed: 76.91
Sampling from val predictions...
Source: vâng , tôi làm toán_học ứng_dụng và đây là một
Reference: so , well , i do applied math ,
Model: <SOS> now , i i i i this and and
Attention Weights: tensor([[8.2017e-01, 1.6697e-01, 1.2854e-02, 1.0832e-05, 2.1251e-07, 2.2537e-08,
         3.4705e-09, 2.1153e-09, 1.1640e-09, 7.8178e-10],
        [6.2991e-03, 9.2579e-02, 8.9630e-01, 4.7758e-03, 4.7127e-05, 1.6594e-06,
         8.8967e-08, 4.1286e-08, 1.2429e-08, 4.8800e-09],
        [6.1704e-05, 3.4421e-03, 5.6481e-01, 4.1660e-01, 1.4852e-02, 2.4057e-04,
         1.9922e-06, 2.0581e-07, 1.1318e-08, 1.7941e-09],
        [1.2386e-06, 5.0991e-05, 2.1406e-02, 6.8861e-01, 2.6785e-01, 2.1918e-02,
         1.4479e-04, 1.0293e-05, 3.3092e-07, 3.2842e-08],
        [2.1633e-06, 9.1122e-05, 2.1989e-02, 5.7853e-01, 3.4560e-01, 5.2612e-02,
         1.0665e-03, 1.0689e-04, 6.1830e-06, 7.9547e-07],
        [1.



Epoch: 1.53, Train Loss: 0.00, Val Loss: 4.98, Train BLEU: 0.00, Val BLEU: 7.74, Minutes Elapsed: 79.43
Sampling from val predictions...
Source: tôi hoàn_toàn tuyệt_vọng . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: i lost all hope . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> i &apos;m &apos;t . . <EOS> <EOS> . <EOS>
Attention Weights: tensor([[0.9988, 0.0012, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1857, 0.8131, 0.0013, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0185, 0.9349, 0.0448, 0.0017, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0019, 0.1908, 0.7106, 0.0955, 0.0012, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0017, 0.0538, 0.3623, 0.5377, 0.0445, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0029, 0.0598, 0.2395, 0.5366, 0.1612, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0010, 0.0471, 0.2118, 0.4884, 0.2517, 0.0000, 0.0000, 0.0000



Epoch: 1.58, Train Loss: 0.00, Val Loss: 4.94, Train BLEU: 0.00, Val BLEU: 8.42, Minutes Elapsed: 81.90
Sampling from val predictions...
Source: chúng_ta những nạn_nhân cần đến tất_cả mọi người . <EOS>
Reference: we victims need everyone . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> we &apos;re to to to . . . <EOS>
Attention Weights: tensor([[7.2615e-01, 2.6974e-01, 4.0456e-03, 5.6556e-05, 4.0203e-06, 8.1541e-07,
         2.6263e-07, 7.0225e-08, 2.0005e-08, 5.6939e-09],
        [3.6141e-03, 8.5823e-01, 1.3639e-01, 1.6929e-03, 6.2679e-05, 5.6954e-06,
         9.8269e-07, 1.1686e-07, 1.6429e-08, 3.7402e-09],
        [1.0230e-03, 1.1429e-01, 5.9419e-01, 2.3066e-01, 4.6777e-02, 1.0628e-02,
         2.2226e-03, 1.9901e-04, 8.0080e-06, 2.4484e-07],
        [9.4990e-06, 9.3833e-04, 1.1893e-01, 4.5354e-01, 2.9185e-01, 1.0468e-01,
         2.7998e-02, 2.0189e-03, 4.1310e-05, 2.8501e-07],
        [1.2167e-06, 4.6670e-05, 3.2895e-03, 6.9221e-02, 2.7250e-01, 3.2384e-01,
         2.4775e-01, 7.6577e-02, 6



Epoch: 1.62, Train Loss: 0.00, Val Loss: 4.91, Train BLEU: 0.00, Val BLEU: 8.48, Minutes Elapsed: 84.39
Sampling from val predictions...
Source: nhưng hầu_hết mọi người không đồng_ý . <EOS> <PAD> <PAD>
Reference: but most people don &apos;t agree . <EOS> <PAD>
Model: <SOS> but the people don &apos;t &apos;t . <EOS> <EOS>
Attention Weights: tensor([[0.2219, 0.7689, 0.0090, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0177, 0.8464, 0.1339, 0.0019, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0014, 0.0380, 0.4621, 0.3942, 0.1039, 0.0004, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0002, 0.0032, 0.0721, 0.3496, 0.5502, 0.0244, 0.0003, 0.0000, 0.0000,
         0.0000],
        [0.0001, 0.0017, 0.0376, 0.2865, 0.6285, 0.0453, 0.0004, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0011, 0.0432, 0.4192, 0.4943, 0.0420, 0.0001, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0004, 0.0142, 0.2050, 0.5331, 0.2430,



Epoch: 1.67, Train Loss: 0.00, Val Loss: 4.91, Train BLEU: 0.00, Val BLEU: 8.31, Minutes Elapsed: 86.86
Sampling from val predictions...
Source: hiện có khoảng 10,000 loài chim trên thế_giới . <EOS>
Reference: there are 10,000 species of birds in the world
Model: <SOS> the &apos;s a <UNK> <UNK> the . . .
Attention Weights: tensor([[9.5661e-01, 4.3160e-02, 2.0226e-04, 3.0986e-05, 7.2221e-07, 7.6645e-08,
         4.5135e-08, 1.1182e-08, 4.6009e-09, 1.7822e-09],
        [7.9941e-02, 8.6104e-01, 4.7251e-02, 1.1732e-02, 3.1918e-05, 7.4061e-07,
         1.9493e-07, 1.7956e-08, 4.0855e-09, 1.1754e-09],
        [2.9508e-02, 4.8274e-01, 1.9895e-01, 2.8009e-01, 8.1401e-03, 3.7969e-04,
         1.7916e-04, 8.0972e-06, 5.0936e-07, 3.1013e-08],
        [2.9371e-04, 2.2198e-02, 1.1853e-01, 7.7534e-01, 8.0064e-02, 2.0437e-03,
         1.4714e-03, 5.3059e-05, 2.4566e-06, 6.8521e-08],
        [8.7501e-05, 1.5566e-03, 9.7932e-03, 6.9738e-01, 2.7425e-01, 7.4520e-03,
         9.1223e-03, 3.4394e-04, 1.555



Epoch: 1.72, Train Loss: 0.00, Val Loss: 4.89, Train BLEU: 0.00, Val BLEU: 8.41, Minutes Elapsed: 89.35
Sampling from val predictions...
Source: và mảnh vườn , nó rất đẹp . <EOS> <PAD>
Reference: and the garden , it was beautiful . <EOS>
Model: <SOS> and the , , , it it . .
Attention Weights: tensor([[0.0185, 0.9779, 0.0036, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0059, 0.9785, 0.0154, 0.0002, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0028, 0.5406, 0.4423, 0.0130, 0.0012, 0.0001, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0009, 0.0390, 0.5348, 0.3246, 0.0875, 0.0127, 0.0006, 0.0000, 0.0000,
         0.0000],
        [0.0007, 0.0085, 0.1956, 0.4293, 0.3299, 0.0348, 0.0011, 0.0000, 0.0000,
         0.0000],
        [0.0001, 0.0006, 0.0224, 0.1688, 0.6113, 0.1877, 0.0090, 0.0001, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0005, 0.0061, 0.2049, 0.6075, 0.1734, 0.0076, 0.0001,
         0.0000],
        [0.00



Epoch: 1.77, Train Loss: 0.00, Val Loss: 4.88, Train BLEU: 0.00, Val BLEU: 8.78, Minutes Elapsed: 91.82
Sampling from val predictions...
Source: gần hai năm đã trôi_qua từ khi cuộc khởi_nghĩa ở
Reference: almost two years have passed since the libyan revolution
Model: <SOS> in the years years years from the the of
Attention Weights: tensor([[9.5761e-01, 4.1826e-02, 5.5401e-04, 5.0208e-06, 4.2771e-07, 3.6749e-07,
         9.1712e-08, 3.2084e-08, 1.4192e-08, 9.2345e-09],
        [9.6825e-03, 6.4403e-01, 3.4050e-01, 5.4651e-03, 1.9059e-04, 1.2281e-04,
         6.5627e-06, 9.5632e-07, 1.9224e-07, 6.3751e-08],
        [7.6104e-04, 1.2170e-01, 7.6418e-01, 9.6732e-02, 8.6934e-03, 7.4433e-03,
         4.6250e-04, 2.4978e-05, 1.3831e-06, 1.8274e-07],
        [3.0895e-05, 6.8709e-03, 6.2854e-01, 2.6003e-01, 3.8804e-02, 6.1001e-02,
         4.5077e-03, 2.0541e-04, 7.9472e-06, 7.3218e-07],
        [1.1409e-05, 8.0460e-04, 1.3901e-01, 2.8537e-01, 1.3278e-01, 3.6824e-01,
         6.8351e-02, 5.1177e



Epoch: 1.82, Train Loss: 0.00, Val Loss: 4.86, Train BLEU: 0.00, Val BLEU: 8.96, Minutes Elapsed: 94.27
Sampling from val predictions...
Source: rất nhiều người bị lừa bởi những lời_hứa <UNK> về
Reference: many have been tricked by false promises of a
Model: <SOS> there many people people the the <UNK> the the
Attention Weights: tensor([[8.4313e-01, 1.5314e-01, 3.7003e-03, 3.2351e-05, 7.8410e-07, 1.8214e-07,
         3.7866e-08, 1.7370e-08, 6.1126e-09, 4.1833e-09],
        [2.8937e-02, 3.8460e-01, 5.4339e-01, 4.1440e-02, 1.3685e-03, 2.4116e-04,
         1.6568e-05, 4.5355e-06, 5.4297e-07, 1.1094e-07],
        [5.4036e-03, 6.0618e-02, 4.2004e-01, 3.5399e-01, 8.7940e-02, 6.2641e-02,
         7.1475e-03, 2.0661e-03, 1.4130e-04, 8.7082e-06],
        [4.9295e-04, 6.3296e-03, 7.4775e-02, 2.1681e-01, 1.5311e-01, 4.1907e-01,
         8.3796e-02, 4.0998e-02, 4.2311e-03, 3.7912e-04],
        [2.4644e-05, 2.1389e-04, 2.2247e-03, 1.2855e-02, 2.5399e-02, 3.1456e-01,
         1.8980e-01, 3.4021e-01,



Epoch: 1.86, Train Loss: 0.00, Val Loss: 4.82, Train BLEU: 0.00, Val BLEU: 9.06, Minutes Elapsed: 96.73
Sampling from val predictions...
Source: trước_tiên , bạn phải mang đến cho họ sự bảo_mật
Reference: first , you have to offer them <UNK> .
Model: <SOS> so , , you to to to to they
Attention Weights: tensor([[9.9772e-01, 1.8826e-03, 3.9229e-04, 3.9685e-06, 1.4737e-07, 4.9318e-08,
         2.5052e-08, 1.4160e-08, 5.9412e-09, 3.9148e-09],
        [3.3111e-01, 1.6352e-01, 4.9632e-01, 8.8233e-03, 1.7410e-04, 3.2846e-05,
         1.2108e-05, 5.6810e-06, 1.6858e-06, 7.9894e-07],
        [2.8410e-03, 2.3425e-02, 8.3171e-01, 1.3962e-01, 2.1906e-03, 1.8774e-04,
         1.7973e-05, 2.1137e-06, 1.3792e-07, 2.6933e-08],
        [1.3478e-04, 2.9765e-03, 7.2049e-01, 2.6567e-01, 9.4177e-03, 1.1817e-03,
         1.2177e-04, 1.1732e-05, 4.0109e-07, 4.1637e-08],
        [7.3337e-05, 2.4644e-03, 1.4516e-01, 7.1791e-01, 1.0524e-01, 2.3349e-02,
         5.0025e-03, 7.7907e-04, 2.0732e-05, 1.4504e-06],
 



Epoch: 1.91, Train Loss: 0.00, Val Loss: 4.81, Train BLEU: 0.00, Val BLEU: 8.46, Minutes Elapsed: 99.26
Sampling from val predictions...
Source: ở kathmandu , tôi được hộ_tống bởi những người phụ_nữ
Reference: in <UNK> , i was escorted by women who
Model: <SOS> in , , i was the to the people
Attention Weights: tensor([[9.8762e-01, 1.2077e-02, 2.8273e-04, 1.7028e-05, 7.2015e-07, 1.8834e-07,
         1.1056e-07, 5.7100e-08, 4.2957e-08, 2.0413e-08],
        [7.1267e-02, 8.3979e-01, 8.3610e-02, 5.2406e-03, 7.8665e-05, 7.1489e-06,
         1.8767e-06, 4.3514e-07, 2.0084e-07, 6.2888e-08],
        [4.0578e-03, 2.4063e-01, 4.3818e-01, 2.9611e-01, 1.8938e-02, 1.8185e-03,
         2.5774e-04, 1.4167e-05, 2.3593e-06, 2.1876e-07],
        [4.3439e-04, 1.2164e-02, 1.7104e-01, 7.6813e-01, 4.2238e-02, 4.8787e-03,
         1.0162e-03, 7.4829e-05, 1.4720e-05, 1.0262e-06],
        [1.7290e-05, 5.4171e-04, 5.3893e-03, 2.7622e-01, 4.6518e-01, 1.8879e-01,
         5.6100e-02, 6.8014e-03, 9.1658e-04, 4.2203



Epoch: 1.96, Train Loss: 0.00, Val Loss: 4.79, Train BLEU: 0.00, Val BLEU: 8.76, Minutes Elapsed: 101.75
Sampling from val predictions...
Source: vì_thế chúng_ta được dạy rằng " sự khoan_dung của ta
Reference: thus we are told that &quot; my mercy takes
Model: <SOS> so we we to to &quot; the &quot; is
Attention Weights: tensor([[8.2594e-01, 1.7068e-01, 3.3300e-03, 4.5948e-05, 3.3174e-06, 5.8644e-07,
         1.2150e-07, 3.6839e-08, 3.0507e-08, 2.4257e-08],
        [7.6713e-02, 8.3395e-01, 8.8266e-02, 1.0277e-03, 3.8595e-05, 1.9208e-06,
         1.8934e-07, 2.6500e-08, 9.8858e-09, 4.6258e-09],
        [3.2678e-03, 1.1631e-01, 8.0626e-01, 7.0540e-02, 3.5692e-03, 5.0646e-05,
         1.9675e-06, 5.0951e-08, 7.7132e-09, 9.3587e-10],
        [3.0039e-04, 5.3957e-02, 5.0791e-01, 3.0692e-01, 1.2265e-01, 7.7907e-03,
         4.6299e-04, 1.5081e-05, 1.8958e-06, 1.0574e-07],
        [3.1156e-06, 2.0346e-04, 1.5285e-02, 3.3224e-01, 5.0304e-01, 1.2810e-01,
         2.0243e-02, 7.9595e-04, 8.8377e-



Epoch: 2.00, Train Loss: 0.00, Val Loss: 4.83, Train BLEU: 0.00, Val BLEU: 8.66, Minutes Elapsed: 103.82
Sampling from val predictions...
Source: cảm_ơn cháu rất nhiều . cảm_ơn . <EOS> <PAD> <PAD>
Reference: thank you so much . thank you . <EOS>
Model: <SOS> thank you very much . . you . thank
Attention Weights: tensor([[0.0081, 0.7454, 0.2453, 0.0012, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0059, 0.6445, 0.3467, 0.0029, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0069, 0.1989, 0.6151, 0.1701, 0.0089, 0.0001, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0012, 0.0175, 0.1463, 0.5090, 0.3184, 0.0071, 0.0005, 0.0000, 0.0000,
         0.0000],
        [0.0006, 0.0033, 0.0295, 0.2830, 0.6428, 0.0376, 0.0032, 0.0001, 0.0000,
         0.0000],
        [0.0007, 0.0055, 0.0367, 0.1660, 0.5708, 0.1940, 0.0255, 0.0008, 0.0000,
         0.0000],
        [0.0007, 0.0046, 0.0273, 0.1053, 0.4069, 0.3556, 0.0946, 0.0049, 0.0000,
         0.



Epoch: 2.05, Train Loss: 0.00, Val Loss: 4.79, Train BLEU: 0.00, Val BLEU: 9.53, Minutes Elapsed: 106.30
Sampling from val predictions...
Source: tôi đã làm việc cho tổ_chức phi chính_phủ của ý
Reference: i worked for an italian ngo , and every
Model: <SOS> i i to the of of the <EOS> the
Attention Weights: tensor([[9.8557e-01, 1.4106e-02, 3.1369e-04, 5.8559e-06, 1.2775e-06, 4.0610e-07,
         1.1400e-07, 4.2056e-08, 1.9205e-08, 1.4889e-08],
        [2.4545e-02, 7.2661e-01, 2.4773e-01, 1.0487e-03, 6.0802e-05, 5.9800e-06,
         4.0124e-07, 4.1971e-08, 1.1783e-08, 4.5012e-09],
        [1.9012e-03, 4.0009e-02, 6.9614e-01, 1.9625e-01, 5.5312e-02, 9.8838e-03,
         4.8426e-04, 1.4438e-05, 1.3403e-06, 1.3127e-07],
        [6.7231e-06, 5.4828e-05, 4.4832e-03, 9.2615e-02, 4.4610e-01, 4.1765e-01,
         3.8274e-02, 7.8411e-04, 3.6831e-05, 1.6337e-06],
        [1.8654e-06, 1.1750e-05, 4.4901e-04, 1.2876e-02, 1.9959e-01, 5.4188e-01,
         2.0775e-01, 3.3566e-02, 3.6631e-03, 2.1296e-04



Epoch: 2.10, Train Loss: 0.00, Val Loss: 4.76, Train BLEU: 0.00, Val BLEU: 9.65, Minutes Elapsed: 108.79
Sampling from val predictions...
Source: tôi nghĩ , trời ạ , tôi cảm_thấy rất tệ
Reference: so i &apos;m like , man , it made
Model: <SOS> i think , , , , , i i
Attention Weights: tensor([[9.9744e-01, 2.5396e-03, 1.8719e-05, 1.0402e-06, 4.0601e-08, 5.8282e-09,
         3.0138e-09, 1.1230e-09, 5.9998e-10, 3.4148e-10],
        [6.1613e-02, 9.1847e-01, 1.9305e-02, 6.1293e-04, 3.6635e-06, 8.8430e-08,
         1.5620e-08, 4.5456e-09, 1.9572e-09, 7.2121e-10],
        [5.5589e-03, 4.1873e-01, 4.5108e-01, 1.2108e-01, 3.4835e-03, 5.8954e-05,
         9.6922e-06, 6.5823e-07, 1.0872e-07, 1.6214e-08],
        [4.1206e-03, 3.2629e-01, 3.5629e-01, 3.0305e-01, 1.0033e-02, 1.7301e-04,
         4.1983e-05, 2.9169e-06, 3.1758e-07, 3.7379e-08],
        [1.2909e-04, 2.9895e-03, 4.4387e-02, 8.8127e-01, 6.6315e-02, 2.6218e-03,
         2.1562e-03, 1.0714e-04, 1.6821e-05, 3.1374e-06],
        [7.3831e-04,



Epoch: 2.14, Train Loss: 0.00, Val Loss: 4.78, Train BLEU: 0.00, Val BLEU: 9.30, Minutes Elapsed: 111.34
Sampling from val predictions...
Source: điều mà tôi đã làm là trồng một rừng thực_phẩm
Reference: so what i did , i planted a food
Model: <SOS> what what i did this is is a a
Attention Weights: tensor([[8.8009e-01, 1.1950e-01, 4.0832e-04, 1.8700e-06, 1.0428e-07, 9.5418e-09,
         2.2470e-09, 1.0837e-09, 4.2280e-10, 3.8107e-10],
        [3.4963e-02, 6.6394e-01, 2.9929e-01, 1.7774e-03, 3.0833e-05, 4.4851e-07,
         4.1018e-08, 7.7980e-09, 1.7881e-09, 9.8249e-10],
        [2.0877e-03, 9.3438e-02, 6.5047e-01, 2.2954e-01, 2.4298e-02, 1.6098e-04,
         2.2900e-06, 9.4541e-08, 3.4294e-09, 6.9768e-10],
        [1.6623e-04, 4.5284e-03, 1.3865e-01, 4.2033e-01, 4.0283e-01, 3.2538e-02,
         9.1508e-04, 3.7676e-05, 6.0592e-07, 6.9718e-08],
        [4.8042e-05, 1.2879e-03, 4.2963e-02, 2.6335e-01, 5.6843e-01, 1.1816e-01,
         5.4610e-03, 2.9073e-04, 5.1358e-06, 5.7486e-07],
     



Epoch: 2.19, Train Loss: 0.00, Val Loss: 4.77, Train BLEU: 0.00, Val BLEU: 9.16, Minutes Elapsed: 113.82
Sampling from val predictions...
Source: tôi đã có cơ_hội đi đến đây bằng máy_bay lần
Reference: i got a chance to come by plane for
Model: <SOS> i i a opportunity to and a a that
Attention Weights: tensor([[9.7414e-01, 2.5597e-02, 2.5455e-04, 4.6676e-06, 7.8499e-08, 1.1478e-08,
         7.1971e-09, 4.2632e-09, 1.7201e-09, 1.0121e-09],
        [3.5950e-02, 8.4127e-01, 1.2151e-01, 1.2732e-03, 2.7490e-06, 1.0945e-07,
         2.3477e-08, 4.6279e-09, 8.9365e-10, 5.0906e-10],
        [2.0603e-03, 7.6637e-02, 7.3972e-01, 1.7935e-01, 2.1119e-03, 9.8843e-05,
         1.6829e-05, 9.6265e-07, 3.1741e-08, 8.2338e-09],
        [2.3128e-05, 4.4921e-04, 4.1571e-02, 8.8937e-01, 5.9727e-02, 6.8137e-03,
         1.9109e-03, 1.3397e-04, 2.9138e-06, 3.9328e-07],
        [1.4116e-04, 6.1370e-04, 1.5862e-02, 4.3594e-01, 3.7250e-01, 1.2760e-01,
         4.3755e-02, 3.4686e-03, 9.9743e-05, 2.7460e-05],
 



Epoch: 2.24, Train Loss: 0.00, Val Loss: 4.73, Train BLEU: 0.00, Val BLEU: 9.86, Minutes Elapsed: 116.30
Sampling from val predictions...
Source: anh ta cần chúng để cảm_thấy được bảo_vệ . <EOS>
Reference: he needed them to feel protected . <EOS> <PAD>
Model: <SOS> he &apos;s to to to . . <EOS> <EOS>
Attention Weights: tensor([[9.7446e-01, 2.5148e-02, 3.8118e-04, 1.4742e-05, 9.2797e-07, 2.4093e-07,
         1.2098e-07, 3.3980e-08, 1.0508e-08, 2.6695e-09],
        [2.2049e-03, 3.1471e-02, 9.4276e-01, 2.3450e-02, 1.0123e-04, 6.0596e-06,
         1.3200e-06, 1.1106e-07, 1.2036e-08, 1.4551e-09],
        [7.2716e-04, 1.2652e-02, 5.4758e-01, 4.2262e-01, 1.2205e-02, 3.3748e-03,
         7.9761e-04, 3.7875e-05, 1.3204e-06, 3.8890e-08],
        [1.4801e-05, 1.6070e-04, 6.2389e-02, 5.8386e-01, 2.5912e-01, 6.9335e-02,
         2.3515e-02, 1.5696e-03, 3.0830e-05, 2.3955e-07],
        [1.3571e-06, 1.1989e-05, 4.6110e-03, 9.5509e-02, 2.2386e-01, 3.4536e-01,
         2.7826e-01, 5.0312e-02, 2.0603e-0



Epoch: 2.29, Train Loss: 0.00, Val Loss: 4.74, Train BLEU: 0.00, Val BLEU: 9.38, Minutes Elapsed: 118.78
Sampling from val predictions...
Source: thật khó_tin , và chúng_tôi nói với những người zambia
Reference: and we could not believe , and we were
Model: <SOS> the the , , and and we we we
Attention Weights: tensor([[9.7950e-01, 2.0378e-02, 1.1768e-04, 8.4127e-07, 1.5633e-07, 1.5626e-08,
         3.8288e-09, 1.5523e-09, 9.1800e-10, 3.2954e-10],
        [5.4865e-02, 9.2634e-01, 1.8684e-02, 9.5958e-05, 1.1589e-05, 3.7103e-07,
         3.9167e-08, 7.4464e-09, 2.2571e-09, 4.8285e-10],
        [5.5305e-03, 1.6770e-01, 7.7781e-01, 4.0556e-02, 8.2211e-03, 1.7677e-04,
         7.6022e-06, 1.4827e-07, 1.6442e-08, 9.6905e-10],
        [2.0211e-03, 5.3988e-02, 4.2501e-01, 4.3181e-01, 8.3278e-02, 3.6807e-03,
         2.1405e-04, 3.6210e-06, 3.1029e-07, 8.3739e-09],
        [7.2112e-05, 2.1199e-02, 2.3432e-01, 5.4808e-01, 1.8940e-01, 6.5308e-03,
         3.9349e-04, 7.8061e-06, 9.0837e-07, 3.9539



Epoch: 2.34, Train Loss: 0.00, Val Loss: 4.72, Train BLEU: 0.00, Val BLEU: 9.42, Minutes Elapsed: 121.26
Sampling from val predictions...
Source: khi tôi lên 7 , tôi chứng_kiến cảnh người_ta <UNK>
Reference: when i was seven years old , i saw
Model: <SOS> when i was , , , i i i
Attention Weights: tensor([[9.6158e-01, 3.8309e-02, 1.1245e-04, 8.9452e-07, 5.7896e-07, 2.0181e-07,
         3.8096e-08, 1.8722e-08, 9.4097e-09, 4.0696e-09],
        [6.3572e-02, 7.9982e-01, 1.3581e-01, 6.2919e-04, 1.4651e-04, 2.5164e-05,
         2.9796e-06, 5.8958e-07, 1.3107e-07, 4.1190e-08],
        [3.9371e-03, 7.8366e-02, 8.7037e-01, 3.6545e-02, 9.6343e-03, 1.0752e-03,
         6.3096e-05, 4.5184e-06, 1.2968e-07, 2.6625e-08],
        [6.0949e-04, 2.5705e-02, 5.8837e-01, 2.0215e-01, 1.3234e-01, 4.5053e-02,
         5.1917e-03, 5.7055e-04, 1.2790e-05, 1.6511e-06],
        [2.1219e-05, 6.2034e-04, 2.2236e-02, 8.5875e-02, 3.7028e-01, 4.6559e-01,
         4.7198e-02, 7.7562e-03, 3.6035e-04, 5.7112e-05],
       



Epoch: 2.38, Train Loss: 0.00, Val Loss: 4.71, Train BLEU: 0.00, Val BLEU: 9.92, Minutes Elapsed: 123.74
Sampling from val predictions...
Source: tất_cả đều có_thể với những thông_tin này . <EOS> <PAD>
Reference: all this is possible with this information . <EOS>
Model: <SOS> all all can all of the . . <EOS>
Attention Weights: tensor([[0.8540, 0.1444, 0.0016, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0415, 0.5595, 0.3891, 0.0097, 0.0002, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0059, 0.1048, 0.6248, 0.2344, 0.0259, 0.0041, 0.0001, 0.0000, 0.0000,
         0.0000],
        [0.0004, 0.0084, 0.1650, 0.4837, 0.2285, 0.1050, 0.0088, 0.0003, 0.0000,
         0.0000],
        [0.0001, 0.0007, 0.0266, 0.2042, 0.2733, 0.3941, 0.0932, 0.0078, 0.0001,
         0.0000],
        [0.0009, 0.0029, 0.0165, 0.0789, 0.1741, 0.4713, 0.2271, 0.0281, 0.0002,
         0.0000],
        [0.0002, 0.0006, 0.0042, 0.0138, 0.0579, 0.2911, 0.4360, 0.1872, 0.00



Epoch: 2.43, Train Loss: 0.00, Val Loss: 4.68, Train BLEU: 0.00, Val BLEU: 10.29, Minutes Elapsed: 126.24
Sampling from val predictions...
Source: vì chị không có quyền_lực nào cả , còn em
Reference: because i &apos;m not in charge of anything ,
Model: <SOS> because there &apos;s &apos;t a , , but ,
Attention Weights: tensor([[9.4373e-01, 4.8655e-02, 7.5427e-03, 6.9060e-05, 2.0896e-06, 2.9249e-07,
         8.0600e-08, 9.5342e-08, 5.3258e-08, 2.9421e-08],
        [3.7684e-02, 1.2161e-01, 8.2695e-01, 1.3591e-02, 1.5974e-04, 6.2743e-06,
         4.1117e-07, 2.1475e-07, 3.4314e-08, 1.1727e-08],
        [2.8966e-03, 1.5300e-02, 6.1640e-01, 3.4360e-01, 2.1299e-02, 4.9426e-04,
         9.6107e-06, 1.8942e-06, 1.4158e-07, 1.5585e-08],
        [3.6991e-03, 2.1129e-02, 4.4701e-01, 3.1662e-01, 1.8071e-01, 2.9064e-02,
         1.3760e-03, 3.5845e-04, 2.7309e-05, 1.7662e-06],
        [1.1143e-04, 2.5307e-04, 4.7915e-03, 7.2309e-02, 4.2869e-01, 4.2034e-01,
         5.2817e-02, 1.8058e-02, 2.4924e-03



Epoch: 2.48, Train Loss: 0.00, Val Loss: 4.67, Train BLEU: 0.00, Val BLEU: 10.44, Minutes Elapsed: 128.70
Sampling from val predictions...
Source: sau đó , tổ_chức tình_nguyện tôi tham_gia all <UNK> <UNK>
Reference: soon after , an organization i volunteer with ,
Model: <SOS> after , i , i , i to <UNK>
Attention Weights: tensor([[4.8253e-01, 5.1461e-01, 2.5905e-03, 2.6975e-04, 1.5616e-06, 1.6715e-07,
         1.2052e-08, 5.2928e-09, 3.7001e-09, 3.6396e-09],
        [1.8842e-02, 1.8680e-01, 4.0443e-01, 3.8873e-01, 1.1655e-03, 3.1768e-05,
         1.1093e-06, 2.6105e-07, 1.2740e-07, 7.4632e-08],
        [1.1417e-04, 8.7777e-04, 2.3369e-02, 9.4916e-01, 2.6080e-02, 3.9111e-04,
         5.4356e-06, 3.5986e-07, 1.0673e-07, 4.2934e-08],
        [1.3314e-05, 1.0746e-04, 6.2913e-03, 6.1464e-01, 3.5610e-01, 2.2734e-02,
         1.0848e-04, 3.5145e-06, 1.3198e-06, 3.1430e-07],
        [4.5437e-05, 2.5235e-04, 8.0002e-03, 4.1069e-01, 4.1081e-01, 1.6768e-01,
         2.1914e-03, 1.6676e-04, 1.2247e



Epoch: 2.53, Train Loss: 0.00, Val Loss: 4.68, Train BLEU: 0.00, Val BLEU: 9.64, Minutes Elapsed: 131.17
Sampling from val predictions...
Source: chúng tới ngày đầu_tiên và nhìn_thấy con bù_nhìn , và
Reference: they will come the first day and they see
Model: <SOS> they &apos;re the and and and and and and
Attention Weights: tensor([[9.9759e-01, 2.3922e-03, 1.4181e-05, 1.5229e-07, 1.1490e-09, 6.4596e-10,
         2.2822e-10, 1.1232e-10, 5.0969e-11, 2.4793e-11],
        [1.0899e-01, 8.0581e-01, 8.4385e-02, 8.1480e-04, 3.1672e-06, 8.8447e-07,
         1.4817e-07, 3.2558e-08, 3.2890e-09, 7.8658e-10],
        [3.3878e-03, 3.9458e-02, 8.5809e-01, 9.5209e-02, 3.4058e-03, 3.2377e-04,
         9.7017e-05, 3.0076e-05, 8.8146e-07, 8.3014e-08],
        [9.7750e-05, 3.4340e-03, 4.3907e-01, 5.3959e-01, 1.3224e-02, 3.2581e-03,
         9.8648e-04, 3.2336e-04, 1.8420e-05, 2.3712e-06],
        [4.8131e-05, 5.4847e-04, 1.7249e-01, 7.5305e-01, 5.7714e-02, 1.0777e-02,
         3.6128e-03, 1.5993e-03, 1.4



Epoch: 2.58, Train Loss: 0.00, Val Loss: 4.65, Train BLEU: 0.00, Val BLEU: 10.21, Minutes Elapsed: 133.68
Sampling from val predictions...
Source: hãy tìm đọc cuốn sách của bà ấy . <EOS>
Reference: just go and read her book . <EOS> <PAD>
Model: <SOS> let &apos;s to your &apos;s she . <EOS> .
Attention Weights: tensor([[7.2833e-01, 2.6767e-01, 3.9218e-03, 6.9736e-05, 3.8939e-06, 4.2969e-07,
         7.3970e-08, 2.0956e-08, 8.8854e-09, 2.4950e-09],
        [8.3278e-03, 4.0069e-01, 5.1864e-01, 7.0549e-02, 1.7125e-03, 7.3899e-05,
         1.9138e-06, 8.9171e-08, 1.5078e-08, 2.6514e-09],
        [2.9163e-03, 4.3648e-02, 2.6044e-01, 4.8601e-01, 1.6047e-01, 4.1732e-02,
         4.5146e-03, 2.5795e-04, 1.1314e-05, 3.8669e-07],
        [4.0885e-05, 5.6205e-04, 1.7985e-02, 2.8027e-01, 4.5484e-01, 1.9286e-01,
         4.8477e-02, 4.7815e-03, 1.8417e-04, 2.8134e-06],
        [3.3038e-06, 5.0867e-05, 2.5419e-03, 8.6466e-02, 4.0536e-01, 3.2920e-01,
         1.4896e-01, 2.6038e-02, 1.3560e-03, 2.2825



Epoch: 2.62, Train Loss: 0.00, Val Loss: 4.63, Train BLEU: 0.00, Val BLEU: 10.96, Minutes Elapsed: 136.17
Sampling from val predictions...
Source: ít_hơn một tháng rồi , ông và con_gái ông đang
Reference: less than a month ago , he and his
Model: <SOS> so a , , , and and and and
Attention Weights: tensor([[9.9086e-01, 9.0829e-03, 6.0018e-05, 5.9402e-07, 4.2926e-08, 1.6110e-08,
         5.2379e-10, 4.5076e-10, 7.8010e-10, 3.8926e-10],
        [1.6991e-01, 5.5037e-01, 2.7360e-01, 5.9995e-03, 9.4653e-05, 2.7110e-05,
         1.6489e-07, 6.8919e-08, 3.2701e-08, 6.7784e-09],
        [2.0914e-02, 5.1660e-02, 4.4691e-01, 3.7776e-01, 5.9016e-02, 4.2411e-02,
         7.1002e-04, 4.7042e-04, 1.4041e-04, 8.4639e-06],
        [1.1009e-02, 1.7037e-02, 6.7525e-02, 2.8088e-01, 2.0079e-01, 4.2074e-01,
         1.5652e-03, 2.0356e-04, 2.4170e-04, 9.5575e-06],
        [8.7440e-03, 1.0044e-02, 3.0129e-02, 1.0857e-01, 1.1126e-01, 7.2725e-01,
         3.1180e-03, 3.6240e-04, 5.0127e-04, 3.0087e-05],
      



Epoch: 2.67, Train Loss: 0.00, Val Loss: 4.63, Train BLEU: 0.00, Val BLEU: 10.47, Minutes Elapsed: 138.69
Sampling from val predictions...
Source: bởi_vì gia_đình tôi không biết tiếng trung , nên tôi
Reference: since my family couldn &apos;t speak chinese , i
Model: <SOS> because my i i i know know i ,
Attention Weights: tensor([[9.9397e-01, 5.9459e-03, 8.2672e-05, 1.8242e-06, 7.4976e-08, 7.6526e-09,
         2.9501e-09, 1.8789e-09, 8.9452e-10, 7.4802e-10],
        [1.7131e-01, 7.7196e-01, 5.4776e-02, 1.8839e-03, 5.8384e-05, 3.2345e-06,
         3.3981e-07, 2.6544e-08, 4.3833e-09, 2.7763e-09],
        [6.7954e-03, 1.2171e-01, 5.3402e-01, 2.9865e-01, 3.6554e-02, 2.1792e-03,
         8.1489e-05, 1.0103e-06, 3.2180e-08, 5.3653e-09],
        [2.8525e-03, 4.2412e-02, 3.4059e-01, 5.1676e-01, 9.3154e-02, 4.1065e-03,
         1.2695e-04, 9.2102e-07, 2.7367e-08, 4.5206e-09],
        [7.5832e-04, 1.6031e-02, 3.1477e-01, 4.1107e-01, 2.2854e-01, 2.7006e-02,
         1.7733e-03, 3.8725e-05, 2.1938e



Epoch: 2.72, Train Loss: 0.00, Val Loss: 4.62, Train BLEU: 0.00, Val BLEU: 10.50, Minutes Elapsed: 141.22
Sampling from val predictions...
Source: đó là những hình_ảnh được dựng lên , và nó
Reference: they are constructions , and they are constructions by
Model: <SOS> it &apos;s the the the , , , and
Attention Weights: tensor([[9.8313e-01, 1.6762e-02, 1.0622e-04, 1.1916e-06, 2.5746e-08, 1.9917e-09,
         3.6551e-10, 1.9450e-10, 6.4922e-11, 5.6179e-11],
        [4.3510e-02, 8.1144e-01, 1.4062e-01, 4.3443e-03, 8.0250e-05, 2.7388e-06,
         1.3304e-07, 8.5302e-09, 1.0847e-09, 4.9437e-10],
        [1.4007e-02, 7.4425e-02, 6.1418e-01, 2.3942e-01, 4.7945e-02, 9.4435e-03,
         5.6873e-04, 1.3205e-05, 9.0085e-07, 2.8694e-07],
        [3.9267e-04, 3.7843e-03, 1.2839e-01, 3.8582e-01, 3.8149e-01, 9.1185e-02,
         8.7613e-03, 1.6095e-04, 7.8778e-06, 1.6944e-06],
        [5.1291e-04, 1.3942e-03, 4.2724e-03, 3.9750e-02, 3.5892e-01, 2.5294e-01,
         3.0949e-01, 3.1337e-02, 1.2682e-0



Epoch: 2.77, Train Loss: 0.00, Val Loss: 4.63, Train BLEU: 0.00, Val BLEU: 10.51, Minutes Elapsed: 143.71
Sampling from val predictions...
Source: phát_minh sắp tới của cháu , cháu muốn làm một
Reference: my next invention is , i want to make
Model: <SOS> the the of , , because want to do
Attention Weights: tensor([[9.1971e-01, 8.0182e-02, 1.0626e-04, 4.2878e-06, 6.2832e-07, 2.2735e-07,
         2.2837e-07, 1.2000e-07, 3.7471e-08, 1.3482e-08],
        [4.4665e-02, 8.6240e-01, 8.9029e-02, 3.6922e-03, 2.0987e-04, 6.0212e-06,
         1.9429e-06, 5.4993e-07, 8.6283e-08, 2.2808e-08],
        [7.1190e-03, 3.2090e-01, 4.4304e-01, 1.8102e-01, 4.6426e-02, 1.1478e-03,
         2.9089e-04, 5.2909e-05, 2.5984e-06, 2.7335e-07],
        [1.5921e-04, 5.8377e-03, 1.0159e-01, 2.7156e-01, 4.2601e-01, 1.0484e-01,
         7.0588e-02, 1.6755e-02, 2.2197e-03, 4.3632e-04],
        [8.0817e-05, 6.7085e-04, 1.2147e-02, 7.8273e-02, 1.7865e-01, 8.8917e-02,
         5.1561e-01, 1.1466e-01, 9.4489e-03, 1.5474e-0



Epoch: 2.82, Train Loss: 0.00, Val Loss: 4.60, Train BLEU: 0.00, Val BLEU: 10.57, Minutes Elapsed: 146.23
Sampling from val predictions...
Source: afghanistan nhìn rất khác khi nhìn từ mỹ . <EOS>
Reference: afghanistan looks so different from here in america .
Model: <SOS> you &apos;s have the the . . <EOS> .
Attention Weights: tensor([[9.4127e-01, 5.8469e-02, 2.5554e-04, 7.7491e-06, 2.0025e-06, 5.5660e-07,
         1.4487e-07, 6.2429e-08, 2.2585e-08, 4.9121e-09],
        [3.3156e-02, 8.5675e-01, 1.0796e-01, 2.0044e-03, 1.1758e-04, 1.4242e-05,
         1.5714e-06, 1.7195e-07, 1.3735e-08, 1.4514e-09],
        [1.1405e-02, 3.0674e-01, 4.7826e-01, 1.4247e-01, 5.0475e-02, 9.3616e-03,
         1.1329e-03, 1.4416e-04, 2.9725e-06, 7.7635e-08],
        [1.2628e-04, 5.2097e-03, 7.0650e-02, 2.3248e-01, 5.2311e-01, 1.3659e-01,
         2.6338e-02, 5.3737e-03, 1.1935e-04, 1.6758e-06],
        [3.5176e-05, 4.6619e-04, 5.6093e-03, 4.0256e-02, 4.3264e-01, 3.2970e-01,
         1.2984e-01, 5.6751e-02, 



Epoch: 2.86, Train Loss: 0.00, Val Loss: 4.57, Train BLEU: 0.00, Val BLEU: 10.66, Minutes Elapsed: 148.73
Sampling from val predictions...
Source: bởi_vì hoà_bình như thuật giả kim , nó cần đến
Reference: for peace has an alchemy , and this alchemy
Model: <SOS> because &apos;s is like , , , it it
Attention Weights: tensor([[9.9648e-01, 3.3922e-03, 1.1083e-04, 1.0017e-05, 2.1334e-06, 6.5467e-07,
         1.3370e-07, 4.8813e-08, 2.5693e-08, 1.1948e-08],
        [1.3556e-01, 6.6320e-01, 1.8008e-01, 1.8353e-02, 2.6202e-03, 1.8006e-04,
         3.3555e-06, 3.6492e-07, 1.0533e-07, 3.7885e-08],
        [4.8717e-02, 1.4345e-01, 4.4109e-01, 2.1555e-01, 1.1141e-01, 3.8756e-02,
         9.3832e-04, 7.4371e-05, 1.7396e-05, 2.2553e-06],
        [6.1102e-03, 2.2101e-02, 2.3455e-01, 3.0476e-01, 2.6673e-01, 1.5818e-01,
         6.7571e-03, 7.0031e-04, 9.8208e-05, 1.4159e-05],
        [3.0459e-05, 5.2366e-04, 6.3618e-02, 2.8367e-01, 4.4623e-01, 1.9013e-01,
         1.3055e-02, 2.2172e-03, 4.5744e-04, 7



Epoch: 2.91, Train Loss: 0.00, Val Loss: 4.57, Train BLEU: 0.00, Val BLEU: 10.12, Minutes Elapsed: 151.29
Sampling from val predictions...
Source: chúng_tôi có những cư_dân địa_phương có_thể tiếp_cận được với <UNK>
Reference: we have local numbers accessible to three quarters of
Model: <SOS> we have the the that can that that that
Attention Weights: tensor([[9.9158e-01, 8.4044e-03, 1.6199e-05, 4.4682e-07, 3.2656e-08, 4.7649e-09,
         4.1918e-10, 7.6390e-11, 1.8135e-11, 7.9058e-12],
        [2.3578e-02, 9.4912e-01, 2.6463e-02, 7.9520e-04, 3.8687e-05, 1.3905e-06,
         4.9957e-08, 3.0943e-09, 5.6559e-10, 1.3978e-10],
        [1.2712e-02, 1.2066e-01, 6.1786e-01, 1.8979e-01, 5.3015e-02, 5.7126e-03,
         2.4157e-04, 6.2966e-06, 3.5468e-07, 2.6815e-08],
        [3.5518e-04, 3.1390e-03, 1.3654e-01, 4.3302e-01, 3.5144e-01, 6.2182e-02,
         1.2063e-02, 1.1745e-03, 8.4753e-05, 4.6215e-06],
        [8.7279e-04, 2.5179e-03, 7.5233e-03, 1.0067e-01, 5.3568e-01, 2.7077e-01,
         6.



Epoch: 2.96, Train Loss: 0.00, Val Loss: 4.56, Train BLEU: 0.00, Val BLEU: 10.42, Minutes Elapsed: 153.81
Sampling from val predictions...
Source: tôi chuyển đến thành_phố new_york khi làm công_việc đầu_tiên viết
Reference: i had moved to new york city for my
Model: <SOS> i i to to the the to i i
Attention Weights: tensor([[9.8473e-01, 1.5139e-02, 1.2723e-04, 6.1881e-06, 3.7420e-07, 1.0742e-07,
         3.2376e-08, 1.1183e-08, 5.4349e-09, 2.8595e-09],
        [6.8433e-02, 9.1289e-01, 1.7824e-02, 8.4400e-04, 1.2310e-05, 4.7742e-07,
         5.7707e-08, 1.6463e-08, 4.5163e-09, 1.3955e-09],
        [2.4473e-02, 3.5913e-01, 5.0982e-01, 9.9965e-02, 5.5363e-03, 9.7127e-04,
         9.0171e-05, 8.0712e-06, 1.3638e-06, 1.8696e-07],
        [5.3514e-04, 2.2164e-02, 2.1583e-01, 6.4654e-01, 1.0529e-01, 8.9763e-03,
         5.4464e-04, 1.0007e-04, 2.0416e-05, 1.8720e-06],
        [2.6182e-04, 2.4244e-03, 7.2046e-02, 5.0574e-01, 2.7724e-01, 1.2523e-01,
         1.4011e-02, 2.2560e-03, 6.9246e-04, 1



Epoch: 3.00, Train Loss: 0.00, Val Loss: 4.60, Train BLEU: 0.00, Val BLEU: 10.42, Minutes Elapsed: 155.87
Sampling from val predictions...
Source: nhờ phát_minh này , tôi đã may_mắn nhận được học_bổng
Reference: because of this invention , i was lucky to
Model: <SOS> now this this , , i &apos;ve been to
Attention Weights: tensor([[9.9144e-01, 8.4841e-03, 7.4864e-05, 2.2458e-06, 3.9751e-07, 3.1512e-08,
         1.3791e-08, 4.3257e-09, 2.2129e-09, 1.0199e-09],
        [3.8256e-01, 5.7824e-01, 3.6745e-02, 1.9182e-03, 4.7612e-04, 4.6858e-05,
         6.5567e-06, 9.8826e-07, 1.2244e-07, 3.0979e-08],
        [1.6030e-01, 3.5288e-01, 2.8954e-01, 1.0927e-01, 5.6461e-02, 2.6530e-02,
         4.5029e-03, 4.8833e-04, 3.0472e-05, 2.9472e-06],
        [6.0782e-02, 1.4751e-01, 2.0855e-01, 1.8846e-01, 2.4019e-01, 1.3207e-01,
         1.9072e-02, 3.1417e-03, 2.0901e-04, 1.8989e-05],
        [2.5127e-02, 2.7770e-02, 1.3362e-01, 2.9023e-01, 4.8581e-01, 3.4574e-02,
         2.2910e-03, 5.4115e-04, 3.8176



Epoch: 3.05, Train Loss: 0.00, Val Loss: 4.56, Train BLEU: 0.00, Val BLEU: 11.13, Minutes Elapsed: 158.39
Sampling from val predictions...
Source: thuật_ngữ ngăn_cản chúng_tôi hiểu được ý_tưởng của anh . <EOS>
Reference: jargon is a barrier to our understanding of your
Model: <SOS> the the we to that the the . <EOS>
Attention Weights: tensor([[9.5056e-01, 4.9049e-02, 3.8578e-04, 3.3159e-06, 2.5169e-07, 4.4292e-08,
         1.0504e-08, 5.8288e-09, 1.3588e-09, 2.9924e-10],
        [2.0889e-01, 7.3174e-01, 5.6780e-02, 2.3888e-03, 1.7202e-04, 2.0142e-05,
         2.6321e-06, 5.1442e-07, 3.9049e-08, 5.9917e-09],
        [1.6025e-02, 1.7976e-01, 5.7983e-01, 1.7896e-01, 3.9940e-02, 4.9049e-03,
         5.4057e-04, 4.3039e-05, 9.2335e-07, 1.5650e-08],
        [5.9984e-05, 3.9034e-03, 7.6381e-02, 5.4486e-01, 3.1156e-01, 5.4652e-02,
         7.8050e-03, 7.7115e-04, 1.1646e-05, 9.7936e-08],
        [4.7113e-05, 3.3660e-03, 9.7629e-02, 2.2606e-01, 4.1253e-01, 1.6777e-01,
         7.8586e-02, 1.339



Epoch: 3.10, Train Loss: 0.00, Val Loss: 4.54, Train BLEU: 0.00, Val BLEU: 11.12, Minutes Elapsed: 160.92
Sampling from val predictions...
Source: cùng nhau , chúng_ta đã chỉ ra những không_gian công_cộng
Reference: together , we &apos;ve shown how powerful our public
Model: <SOS> in , , we going the the the .
Attention Weights: tensor([[9.1138e-01, 8.8306e-02, 2.9193e-04, 1.9218e-05, 5.8623e-07, 7.7431e-08,
         1.2803e-08, 4.0224e-09, 1.9117e-09, 7.4133e-10],
        [9.3747e-02, 6.7459e-01, 2.1788e-01, 1.1635e-02, 1.8694e-03, 2.5083e-04,
         2.3908e-05, 3.5833e-06, 7.0524e-07, 1.4107e-07],
        [1.8461e-02, 1.3971e-01, 2.0702e-01, 4.1492e-01, 1.6339e-01, 4.9776e-02,
         5.9308e-03, 7.2202e-04, 6.1240e-05, 4.0414e-06],
        [9.9880e-05, 3.2174e-03, 8.3785e-03, 3.3960e-01, 4.2720e-01, 1.8773e-01,
         2.7539e-02, 5.6238e-03, 5.7806e-04, 2.8073e-05],
        [2.4941e-05, 4.0464e-04, 1.1390e-03, 5.3874e-03, 2.2125e-01, 5.5461e-01,
         1.7019e-01, 4.1991e-02,



Epoch: 3.14, Train Loss: 0.00, Val Loss: 4.55, Train BLEU: 0.00, Val BLEU: 11.22, Minutes Elapsed: 163.42
Sampling from val predictions...
Source: và chúng_tôi tiếp_tục đi cho tới biên_giới lào , nhung
Reference: we made it all the way to the border
Model: <SOS> and we to to to the , the ,
Attention Weights: tensor([[1.6326e-02, 9.8318e-01, 4.9380e-04, 8.7716e-07, 1.4111e-08, 9.2145e-10,
         2.7674e-10, 1.0402e-10, 8.8216e-11, 6.9092e-11],
        [4.5696e-03, 9.1209e-01, 8.3135e-02, 1.9596e-04, 3.9079e-06, 2.0402e-07,
         4.7637e-08, 9.6399e-09, 3.0321e-09, 7.1113e-10],
        [1.4775e-05, 6.8794e-03, 9.8988e-01, 3.1966e-03, 2.5832e-05, 5.6949e-07,
         5.3907e-08, 4.2613e-09, 5.3926e-10, 1.2072e-10],
        [1.2674e-05, 2.4559e-04, 1.2315e-02, 6.1369e-01, 3.2969e-01, 3.1210e-02,
         1.1305e-02, 1.4290e-03, 9.4821e-05, 2.9701e-06],
        [1.5012e-04, 4.5979e-04, 2.3033e-03, 6.3786e-02, 3.4576e-01, 2.6901e-01,
         2.0957e-01, 8.8712e-02, 1.8204e-02, 2.0511e-



Epoch: 3.19, Train Loss: 0.00, Val Loss: 4.54, Train BLEU: 0.00, Val BLEU: 11.41, Minutes Elapsed: 165.95
Sampling from val predictions...
Source: tất_nhiên những người dân bản_địa <UNK> hứng_thú làm công_việc này
Reference: and of course the local people had absolutely no
Model: <SOS> of many people of people of that &apos;t to
Attention Weights: tensor([[9.9737e-01, 2.5759e-03, 5.2270e-05, 2.7708e-06, 2.2983e-07, 1.3159e-08,
         1.5308e-08, 5.7492e-09, 1.5541e-09, 9.7116e-10],
        [7.9062e-01, 1.7006e-01, 3.3808e-02, 4.9936e-03, 4.9782e-04, 8.9846e-06,
         4.8144e-06, 4.4467e-07, 8.5891e-08, 2.4456e-08],
        [2.8174e-01, 3.4229e-01, 2.3673e-01, 1.0434e-01, 3.3414e-02, 1.0587e-03,
         4.0180e-04, 1.7426e-05, 4.6679e-07, 4.7755e-08],
        [7.4508e-02, 1.2668e-01, 2.4407e-01, 2.6331e-01, 2.5949e-01, 2.0808e-02,
         1.0640e-02, 4.8230e-04, 1.2745e-05, 8.5187e-07],
        [2.6800e-02, 5.2480e-02, 1.8427e-01, 2.5992e-01, 3.8992e-01, 3.8306e-02,
         4.64



Epoch: 3.24, Train Loss: 0.00, Val Loss: 4.53, Train BLEU: 0.00, Val BLEU: 11.33, Minutes Elapsed: 168.47
Sampling from val predictions...
Source: " trước_khi tôi chết , tôi muốn hoàn_toàn là chính
Reference: &quot; before i die , i want to be
Model: <SOS> &quot; i i i , i i to do
Attention Weights: tensor([[9.7628e-01, 2.3669e-02, 5.0264e-05, 1.1509e-06, 1.8944e-07, 1.1456e-07,
         5.6437e-08, 1.5350e-08, 5.2392e-09, 2.8104e-09],
        [8.3770e-03, 9.5946e-01, 3.1512e-02, 6.3595e-04, 6.6347e-06, 3.8679e-06,
         9.6280e-07, 1.5444e-07, 3.1655e-08, 1.2030e-08],
        [1.6402e-03, 2.8465e-01, 5.9691e-01, 1.1574e-01, 8.1016e-04, 2.0736e-04,
         2.9092e-05, 7.0102e-07, 4.6227e-08, 9.0387e-09],
        [3.7222e-04, 5.3995e-02, 3.0600e-01, 6.1359e-01, 1.5966e-02, 5.0285e-03,
         4.8684e-03, 1.7579e-04, 5.2996e-06, 4.4808e-07],
        [6.0449e-04, 3.0990e-02, 1.9455e-01, 6.7499e-01, 5.2990e-02, 2.3923e-02,
         2.0898e-02, 1.0002e-03, 5.7133e-05, 5.3007e-06],
    



Epoch: 3.29, Train Loss: 0.00, Val Loss: 4.54, Train BLEU: 0.00, Val BLEU: 11.00, Minutes Elapsed: 170.99
Sampling from val predictions...
Source: ông làm thế_nào - - ? " và tôi đáp
Reference: how can you do — ? &quot; and i
Model: <SOS> he do do -- ? ? &quot; and i
Attention Weights: tensor([[9.8944e-01, 1.0490e-02, 6.0344e-05, 7.9326e-06, 7.2038e-07, 7.8079e-08,
         3.5476e-08, 4.2190e-09, 3.9162e-09, 2.8961e-09],
        [3.4438e-01, 5.9372e-01, 5.3949e-02, 7.7061e-03, 2.3359e-04, 1.5713e-05,
         1.9858e-06, 5.3958e-08, 1.2620e-08, 1.0851e-08],
        [1.5633e-01, 1.9742e-01, 2.9526e-01, 3.1056e-01, 3.6123e-02, 3.8218e-03,
         4.7341e-04, 1.5636e-05, 1.7681e-06, 5.5211e-07],
        [1.3763e-02, 8.7010e-02, 2.5491e-01, 5.3475e-01, 9.6233e-02, 1.0333e-02,
         2.8519e-03, 1.3458e-04, 1.9223e-05, 3.6459e-06],
        [4.0824e-05, 5.5888e-04, 1.2590e-02, 3.1811e-01, 5.0344e-01, 1.1943e-01,
         4.2752e-02, 2.9254e-03, 1.3417e-04, 1.5746e-05],
        [6.4010e-05



Epoch: 3.34, Train Loss: 0.00, Val Loss: 4.52, Train BLEU: 0.00, Val BLEU: 10.73, Minutes Elapsed: 173.51
Sampling from val predictions...
Source: thực ra , hầu_hết chúng đều được chụp bởi những
Reference: in fact , most of them were taken by
Model: <SOS> in fact , the they are are are by
Attention Weights: tensor([[9.5678e-01, 3.8812e-02, 3.7967e-03, 6.0010e-04, 7.3871e-06, 8.1656e-07,
         3.6055e-07, 1.1097e-07, 5.5202e-08, 2.4728e-08],
        [5.6001e-02, 2.0746e-01, 6.2270e-01, 1.0779e-01, 5.5654e-03, 3.8511e-04,
         7.2140e-05, 1.3811e-05, 3.3229e-06, 1.0147e-06],
        [2.5101e-03, 1.3133e-02, 6.3285e-02, 6.9668e-01, 1.9392e-01, 2.5167e-02,
         4.6727e-03, 5.5285e-04, 7.0990e-05, 5.8391e-06],
        [2.8201e-04, 1.7462e-03, 1.6588e-02, 8.8255e-01, 8.6846e-02, 8.2798e-03,
         2.9460e-03, 5.9504e-04, 1.5517e-04, 1.6943e-05],
        [2.2365e-05, 3.4744e-04, 5.2681e-03, 1.8657e-01, 6.4793e-01, 1.2305e-01,
         3.2241e-02, 4.1072e-03, 4.3320e-04, 2.5272e-0



Epoch: 3.38, Train Loss: 0.00, Val Loss: 4.52, Train BLEU: 0.00, Val BLEU: 11.70, Minutes Elapsed: 176.01
Sampling from val predictions...
Source: bảo_hộ , là tôi đối_xử với bất_cứ người nào đến
Reference: <UNK> , i treat anybody from a different culture
Model: <SOS> so , i &apos;m to lot some of of
Attention Weights: tensor([[9.7373e-01, 2.5134e-02, 1.1259e-03, 1.4308e-05, 6.1769e-07, 8.4839e-08,
         2.5247e-08, 1.2786e-08, 6.5012e-09, 3.7595e-09],
        [1.7804e-01, 2.2277e-01, 5.4269e-01, 5.4652e-02, 1.7234e-03, 1.0961e-04,
         1.2029e-05, 2.9053e-06, 6.8866e-07, 2.1985e-07],
        [4.6420e-03, 1.4970e-02, 4.6447e-01, 4.5063e-01, 6.0352e-02, 4.6444e-03,
         2.6941e-04, 2.0248e-05, 1.8996e-06, 2.5830e-07],
        [1.5011e-05, 7.3504e-05, 8.7216e-03, 3.0920e-01, 4.7647e-01, 1.7675e-01,
         2.6417e-02, 2.2278e-03, 1.1371e-04, 7.7841e-06],
        [7.3093e-05, 7.3180e-04, 4.0006e-03, 5.2120e-02, 3.3406e-01, 4.5874e-01,
         1.3686e-01, 1.1577e-02, 1.5685e-03



Epoch: 3.43, Train Loss: 0.00, Val Loss: 4.48, Train BLEU: 0.00, Val BLEU: 11.65, Minutes Elapsed: 178.52
Sampling from val predictions...
Source: và làm thế_nào chúng_ta chia_sẻ nhiều hơn những hy_vọng của
Reference: and how can we share more of our hopes
Model: <SOS> and how we we take to to of of
Attention Weights: tensor([[2.4548e-03, 9.9710e-01, 4.3710e-04, 4.4645e-06, 3.5071e-08, 4.8981e-09,
         1.4689e-09, 5.0133e-10, 3.5316e-10, 2.3301e-10],
        [1.6388e-03, 9.9198e-01, 6.0227e-03, 3.5124e-04, 3.7225e-06, 2.9367e-07,
         3.0749e-08, 5.3743e-09, 1.6973e-09, 3.9392e-10],
        [3.8881e-04, 2.6756e-01, 3.7170e-01, 3.5191e-01, 7.8251e-03, 5.8734e-04,
         2.1195e-05, 2.0991e-06, 3.0951e-07, 3.5797e-08],
        [1.2307e-04, 1.7027e-02, 7.3640e-02, 5.2795e-01, 2.2723e-01, 1.4620e-01,
         7.4310e-03, 3.6553e-04, 3.2777e-05, 2.0890e-06],
        [8.1729e-07, 2.7721e-05, 5.1500e-04, 1.8125e-02, 1.8388e-01, 6.8985e-01,
         9.2460e-02, 1.2835e-02, 2.1468e-03



Epoch: 3.48, Train Loss: 0.00, Val Loss: 4.48, Train BLEU: 0.00, Val BLEU: 11.95, Minutes Elapsed: 181.01
Sampling from val predictions...
Source: với_lại tôi cũng cảm_thấy mệt_mỏi với việc phải lái_xe 45
Reference: plus i got tired of driving 45 minutes round
Model: <SOS> so i i also with with to to to
Attention Weights: tensor([[9.6228e-01, 3.6958e-02, 7.3957e-04, 2.1424e-05, 2.2604e-06, 3.8445e-07,
         1.2242e-07, 1.0246e-07, 2.4890e-08, 2.1009e-08],
        [1.2203e-01, 7.1402e-01, 1.6000e-01, 3.7221e-03, 2.1231e-04, 1.2817e-05,
         1.5545e-06, 7.0336e-07, 1.4877e-07, 6.8846e-08],
        [1.7511e-03, 1.0668e-02, 8.6978e-01, 1.0852e-01, 9.0910e-03, 1.8237e-04,
         6.3478e-06, 1.2806e-06, 5.8977e-08, 7.7704e-09],
        [2.8842e-04, 1.5560e-02, 4.2481e-01, 3.4370e-01, 1.8952e-01, 2.4303e-02,
         1.3112e-03, 4.9861e-04, 9.7657e-06, 1.0459e-06],
        [1.8021e-05, 7.6331e-04, 1.4305e-02, 2.3759e-01, 5.5447e-01, 1.6767e-01,
         1.8818e-02, 6.0862e-03, 2.3044



Epoch: 3.53, Train Loss: 0.00, Val Loss: 4.50, Train BLEU: 0.00, Val BLEU: 11.29, Minutes Elapsed: 183.61
Sampling from val predictions...
Source: suy_nghĩ về cái chết làm cuộc_sống của bạn rõ_ràng .
Reference: thinking about death <UNK> your life . <EOS> <PAD>
Model: <SOS> the &apos;s the of the <EOS> . <EOS> you
Attention Weights: tensor([[9.5105e-01, 4.6801e-02, 1.9741e-03, 1.3405e-04, 2.8312e-05, 5.8945e-06,
         2.0325e-06, 1.9847e-06, 9.0619e-07, 6.0409e-07],
        [2.3738e-01, 6.5553e-01, 9.8655e-02, 7.7302e-03, 6.0612e-04, 9.5104e-05,
         8.5928e-06, 2.5192e-06, 1.0717e-06, 1.7563e-07],
        [2.1012e-02, 1.8603e-01, 3.0132e-01, 3.4154e-01, 1.0591e-01, 3.8182e-02,
         4.9524e-03, 9.0846e-04, 1.3810e-04, 5.1069e-06],
        [4.9393e-04, 1.5133e-02, 8.3033e-02, 3.1109e-01, 2.2166e-01, 2.9746e-01,
         4.2683e-02, 2.0293e-02, 7.9495e-03, 2.1627e-04],
        [3.5898e-05, 8.3786e-04, 1.1095e-02, 7.3999e-02, 1.6972e-01, 3.5237e-01,
         1.6385e-01, 1.5375e



Epoch: 3.58, Train Loss: 0.00, Val Loss: 4.46, Train BLEU: 0.00, Val BLEU: 11.36, Minutes Elapsed: 186.10
Sampling from val predictions...
Source: tại nhật , tới tháng_bảy , chúng_tôi đã mở_rộng ra
Reference: within japan , by july , we &apos;d <UNK>
Model: <SOS> in , , the , , we &apos;ve to
Attention Weights: tensor([[9.5880e-01, 4.0591e-02, 5.9834e-04, 6.2801e-06, 1.0668e-07, 3.2619e-08,
         2.4040e-08, 6.2299e-09, 2.0980e-09, 8.7038e-10],
        [3.0167e-01, 4.3394e-01, 2.4606e-01, 1.6689e-02, 1.5380e-03, 4.5946e-05,
         4.5626e-05, 5.7140e-06, 1.2778e-06, 2.9079e-07],
        [4.8782e-02, 8.2060e-02, 8.2235e-02, 7.0840e-01, 7.2926e-02, 1.3214e-03,
         3.8584e-03, 3.8776e-04, 3.0110e-05, 2.9959e-06],
        [5.5998e-03, 1.1618e-02, 5.0758e-02, 8.4926e-01, 4.8359e-02, 4.8548e-03,
         2.8529e-02, 9.5900e-04, 5.6038e-05, 7.8877e-06],
        [1.7799e-05, 1.4022e-04, 1.8507e-03, 1.2022e-01, 6.3097e-01, 4.7077e-02,
         1.7718e-01, 1.9715e-02, 2.5975e-03, 2.373



Epoch: 3.62, Train Loss: 0.00, Val Loss: 4.45, Train BLEU: 0.00, Val BLEU: 12.08, Minutes Elapsed: 188.61
Sampling from val predictions...
Source: khi lượng đậu_xanh có_giá 1 đô_la sẽ cho bạn lượng
Reference: when one dollar &apos;s worth of green beans will
Model: <SOS> when the &apos;s to a a a to you
Attention Weights: tensor([[9.9152e-01, 8.3438e-03, 1.2498e-04, 5.2453e-06, 1.9559e-06, 6.9753e-07,
         2.0667e-06, 5.9901e-07, 2.8756e-07, 1.0779e-07],
        [1.2249e-01, 7.8071e-01, 9.5149e-02, 1.5394e-03, 9.2858e-05, 6.4259e-06,
         1.5587e-05, 2.4950e-06, 6.2406e-07, 1.6704e-07],
        [1.0030e-01, 4.5453e-01, 4.0123e-01, 4.1124e-02, 2.4273e-03, 6.2058e-05,
         2.9685e-04, 2.0089e-05, 2.6671e-06, 5.2291e-07],
        [4.8869e-02, 2.3613e-01, 3.0925e-01, 3.4649e-01, 3.5809e-02, 1.4204e-03,
         1.9585e-02, 2.0742e-03, 3.2392e-04, 5.8181e-05],
        [8.1686e-03, 4.0997e-02, 1.8778e-01, 5.1889e-01, 1.5207e-01, 7.6449e-03,
         7.2788e-02, 1.0290e-02, 1.1587



Epoch: 3.67, Train Loss: 0.00, Val Loss: 4.46, Train BLEU: 0.00, Val BLEU: 11.71, Minutes Elapsed: 191.12
Sampling from val predictions...
Source: đó là lí_do tại_sao người da_trắng tại châu phi được
Reference: that &apos;s why the white people in africa are
Model: <SOS> that &apos;s why why people is are the ,
Attention Weights: tensor([[9.5147e-01, 4.7656e-02, 8.5873e-04, 1.5045e-05, 2.0685e-07, 1.3155e-08,
         3.8956e-09, 1.5890e-09, 9.4307e-10, 6.1860e-10],
        [5.4132e-02, 8.8877e-01, 5.2408e-02, 4.5540e-03, 1.2604e-04, 5.5482e-06,
         7.1835e-07, 9.7261e-08, 2.7423e-08, 6.2484e-09],
        [3.2839e-02, 1.8989e-01, 5.2173e-01, 2.2110e-01, 3.1210e-02, 2.6379e-03,
         5.3119e-04, 4.7743e-05, 6.9045e-06, 8.7662e-07],
        [1.0193e-03, 7.7732e-03, 1.0710e-01, 3.5891e-01, 4.6637e-01, 4.5757e-02,
         1.1972e-02, 9.7698e-04, 9.6589e-05, 1.4998e-05],
        [4.8419e-05, 3.6542e-04, 3.6560e-03, 1.5283e-01, 6.6223e-01, 1.1861e-01,
         5.6949e-02, 4.8442e-03



Epoch: 3.72, Train Loss: 0.00, Val Loss: 4.44, Train BLEU: 0.00, Val BLEU: 11.69, Minutes Elapsed: 193.62
Sampling from val predictions...
Source: cảm_ứng từ có_thể tạo nhiệt , đặc_biệt thép lại dẫn_nhiệt
Reference: induction can heat , especially steel ; it &apos;s
Model: <SOS> but you can the , , , , &apos;s
Attention Weights: tensor([[2.5280e-01, 7.4172e-01, 5.4561e-03, 2.5592e-05, 3.6791e-07, 1.1815e-08,
         6.1857e-09, 9.2796e-10, 1.8683e-10, 1.4910e-10],
        [8.4835e-02, 6.0284e-01, 3.0579e-01, 6.4300e-03, 1.0783e-04, 1.3717e-06,
         4.8153e-07, 2.8584e-08, 2.6226e-09, 1.1598e-09],
        [1.5280e-02, 1.2323e-01, 5.8054e-01, 2.5624e-01, 2.4133e-02, 5.2201e-04,
         5.6403e-05, 3.1750e-06, 1.6711e-07, 3.8763e-08],
        [3.0451e-05, 2.4511e-03, 4.9619e-02, 3.3302e-01, 4.9070e-01, 9.5553e-02,
         2.5682e-02, 2.6143e-03, 2.3177e-04, 9.7455e-05],
        [7.2576e-05, 1.0478e-03, 6.0738e-03, 9.8546e-02, 4.5733e-01, 2.7393e-01,
         1.4202e-01, 1.3946e-02,



Epoch: 3.77, Train Loss: 0.00, Val Loss: 4.48, Train BLEU: 0.00, Val BLEU: 11.97, Minutes Elapsed: 196.19
Sampling from val predictions...
Source: khá là ấm_áp trong mùa_đông nhưng mùa_hè thì cực_kì nóng
Reference: it was cozy in winter but extremely hot in
Model: <SOS> it &apos;s the in the , but the the
Attention Weights: tensor([[9.9450e-01, 4.9758e-03, 5.1616e-04, 4.4820e-06, 2.1109e-07, 1.6939e-08,
         6.2696e-09, 3.6715e-09, 1.4626e-09, 1.0625e-09],
        [4.5150e-01, 2.0068e-01, 3.3253e-01, 1.4451e-02, 7.9906e-04, 2.9890e-05,
         6.3080e-06, 1.6376e-06, 4.4717e-07, 1.4519e-07],
        [2.2578e-01, 1.2902e-01, 5.4771e-01, 6.9965e-02, 2.3624e-02, 2.6194e-03,
         9.8650e-04, 2.1637e-04, 5.6683e-05, 1.7207e-05],
        [5.1575e-03, 7.0388e-03, 6.3128e-01, 2.5862e-01, 8.9436e-02, 6.9642e-03,
         1.2343e-03, 2.2843e-04, 3.1039e-05, 8.2901e-06],
        [9.3183e-04, 1.1530e-03, 1.4389e-01, 3.7848e-01, 3.8778e-01, 7.1837e-02,
         1.3418e-02, 2.1684e-03, 2.80



Epoch: 3.82, Train Loss: 0.00, Val Loss: 4.42, Train BLEU: 0.00, Val BLEU: 12.03, Minutes Elapsed: 198.68
Sampling from val predictions...
Source: thật ra , hy_vọng là nó không đến_nỗi kì_cục như
Reference: well , hopefully not as awkward as that picture
Model: <SOS> so , , &apos;s it it , , ,
Attention Weights: tensor([[8.6329e-01, 1.2099e-01, 1.3857e-02, 1.8474e-03, 6.4633e-06, 7.1417e-07,
         2.7652e-07, 2.5163e-08, 1.1430e-08, 7.9094e-09],
        [9.7348e-03, 3.3617e-02, 4.6466e-01, 4.8972e-01, 2.0264e-03, 2.1071e-04,
         3.1083e-05, 9.8227e-07, 1.9955e-07, 4.1241e-08],
        [9.4577e-04, 2.9370e-03, 2.1208e-02, 8.8597e-01, 5.0220e-02, 3.0102e-02,
         7.9235e-03, 5.9584e-04, 8.3439e-05, 1.2708e-05],
        [2.0957e-04, 6.1148e-04, 8.1570e-03, 7.2646e-01, 1.2784e-01, 1.0451e-01,
         3.0286e-02, 1.7203e-03, 1.7912e-04, 2.1484e-05],
        [7.6500e-05, 1.8627e-04, 2.2446e-03, 7.0364e-02, 1.0719e-01, 4.6579e-01,
         2.9744e-01, 4.7954e-02, 7.7360e-03, 1.02



Epoch: 3.86, Train Loss: 0.00, Val Loss: 4.41, Train BLEU: 0.00, Val BLEU: 12.19, Minutes Elapsed: 201.27
Sampling from val predictions...
Source: vì quân_cảnh không giữ cho môi_trường trong_sạch . <EOS> <PAD>
Reference: because mps do not keep the environment clean .
Model: <SOS> because it &apos;s &apos;t just the . . <EOS>
Attention Weights: tensor([[0.9127, 0.0810, 0.0063, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0155, 0.3470, 0.6297, 0.0074, 0.0003, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0174, 0.1827, 0.6153, 0.1425, 0.0353, 0.0064, 0.0005, 0.0000, 0.0000,
         0.0000],
        [0.0133, 0.0580, 0.6447, 0.1118, 0.1145, 0.0435, 0.0124, 0.0017, 0.0000,
         0.0000],
        [0.0001, 0.0014, 0.0118, 0.1556, 0.3856, 0.3324, 0.0998, 0.0131, 0.0002,
         0.0000],
        [0.0000, 0.0001, 0.0017, 0.0170, 0.1106, 0.4150, 0.2714, 0.1799, 0.0042,
         0.0000],
        [0.0001, 0.0002, 0.0012, 0.0059, 0.0382, 0.3281, 0.



Epoch: 3.91, Train Loss: 0.00, Val Loss: 4.42, Train BLEU: 0.00, Val BLEU: 11.39, Minutes Elapsed: 203.79
Sampling from val predictions...
Source: người có ý_tưởng chưa chắc đã có sự hiểu_biết ,
Reference: the person with the idea may not have the
Model: <SOS> people are are &apos;t who who not had ,
Attention Weights: tensor([[9.8783e-01, 1.2118e-02, 5.5632e-05, 6.2058e-07, 2.3958e-08, 1.3102e-09,
         1.4068e-10, 5.8727e-11, 3.1719e-11, 2.3479e-11],
        [1.5517e-01, 3.1474e-01, 4.7243e-01, 5.4569e-02, 3.0190e-03, 7.1909e-05,
         3.6143e-06, 7.7113e-07, 1.1330e-07, 9.3136e-09],
        [5.0296e-02, 9.2185e-02, 3.7805e-01, 3.8118e-01, 9.1052e-02, 6.1834e-03,
         7.0494e-04, 3.0355e-04, 4.6771e-05, 1.2145e-06],
        [8.8952e-02, 1.2250e-01, 1.8825e-01, 3.7273e-01, 2.0812e-01, 1.7871e-02,
         1.1463e-03, 3.6826e-04, 5.2000e-05, 2.8297e-06],
        [1.6973e-03, 4.2369e-03, 4.1606e-02, 2.5826e-01, 5.5864e-01, 1.0508e-01,
         1.5062e-02, 1.2541e-02, 2.8032e-0



Epoch: 3.96, Train Loss: 0.00, Val Loss: 4.40, Train BLEU: 0.00, Val BLEU: 11.57, Minutes Elapsed: 206.31
Sampling from val predictions...
Source: điều đó không tồn_tại <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: it doesn &apos;t exist . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> that &apos;s not . . <EOS> <EOS> . <EOS>
Attention Weights: tensor([[0.5130, 0.4130, 0.0740, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0499, 0.1414, 0.7881, 0.0206, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0406, 0.0966, 0.6625, 0.1925, 0.0078, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0072, 0.0107, 0.0725, 0.7530, 0.1566, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0035, 0.0045, 0.0366, 0.3750, 0.5805, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0113, 0.0119, 0.0476, 0.2474, 0.6818, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0191, 0.0206, 0.1191, 0.3417, 0.4994, 0.0000, 0.0000, 0.0



Epoch: 4.00, Train Loss: 0.00, Val Loss: 4.45, Train BLEU: 0.00, Val BLEU: 11.49, Minutes Elapsed: 208.44
Sampling from val predictions...
Source: cảm_ơn các bạn . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: thank you . <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Model: <SOS> thank you . <EOS> you . <EOS> you <EOS>
Attention Weights: tensor([[0.2028, 0.1538, 0.6361, 0.0072, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0772, 0.1368, 0.7056, 0.0798, 0.0007, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0283, 0.0290, 0.2362, 0.6356, 0.0710, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0150, 0.0161, 0.0742, 0.4438, 0.4508, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0065, 0.0070, 0.0267, 0.2742, 0.6856, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0144, 0.0150, 0.0698, 0.3194, 0.5814, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1107, 0.1572, 0.2247, 0.2302, 0.2772, 0.0000, 0.0000, 0.0000, 0.



Epoch: 4.05, Train Loss: 0.00, Val Loss: 4.41, Train BLEU: 0.00, Val BLEU: 12.18, Minutes Elapsed: 211.04
Sampling from val predictions...
Source: thay vào đó , slide ví_dụ này của <UNK> brown
Reference: instead , this example slide by <UNK> brown is
Model: <SOS> instead , the example of of the of of
Attention Weights: tensor([[9.5286e-01, 3.9203e-02, 7.8304e-03, 1.0273e-04, 6.7340e-07, 3.8434e-07,
         1.1890e-08, 1.7199e-09, 3.9079e-10, 4.6941e-10],
        [1.0235e-02, 2.4966e-02, 2.8899e-01, 6.5511e-01, 7.0630e-03, 1.3471e-02,
         1.6215e-04, 4.8174e-06, 1.2968e-07, 9.9153e-08],
        [4.6379e-03, 1.0723e-02, 4.6903e-02, 2.3631e-01, 3.6870e-01, 3.1163e-01,
         2.0069e-02, 1.0079e-03, 1.6095e-05, 4.4117e-06],
        [1.4874e-04, 6.5039e-04, 3.9525e-03, 3.8751e-02, 5.3841e-02, 8.2888e-01,
         6.8293e-02, 5.4107e-03, 5.8154e-05, 1.3027e-05],
        [1.0414e-04, 2.8310e-04, 2.5974e-03, 3.2229e-02, 9.7653e-03, 7.2678e-01,
         1.8147e-01, 4.5533e-02, 8.7237e-0



Epoch: 4.10, Train Loss: 0.00, Val Loss: 4.40, Train BLEU: 0.00, Val BLEU: 12.45, Minutes Elapsed: 213.68
Sampling from val predictions...
Source: sự sụp_đổ của bức tường berlin có_thể sẽ không tồn_tại
Reference: the fall of the berlin wall would maybe not
Model: <SOS> the the of of of of is that be
Attention Weights: tensor([[6.3794e-01, 3.5861e-01, 3.4140e-03, 3.7999e-05, 1.3962e-06, 3.1377e-07,
         8.8664e-08, 5.8500e-08, 1.8339e-08, 4.4118e-09],
        [9.2424e-02, 7.3086e-01, 1.3763e-01, 3.3931e-02, 4.6963e-03, 4.1258e-04,
         3.1280e-05, 8.9617e-06, 2.5382e-06, 4.3398e-07],
        [7.7969e-02, 2.8933e-01, 2.0811e-01, 2.0963e-01, 1.6403e-01, 4.1850e-02,
         6.8626e-03, 1.9147e-03, 2.8086e-04, 2.5749e-05],
        [2.3139e-02, 6.4206e-02, 1.0501e-01, 3.0509e-01, 3.8687e-01, 9.7801e-02,
         1.3537e-02, 3.6869e-03, 6.2507e-04, 3.1965e-05],
        [8.3620e-03, 6.4643e-02, 1.0267e-01, 2.6807e-01, 3.7259e-01, 1.1893e-01,
         4.8287e-02, 1.4197e-02, 2.1831e-03



Epoch: 4.14, Train Loss: 0.00, Val Loss: 4.40, Train BLEU: 0.00, Val BLEU: 12.37, Minutes Elapsed: 216.30
Sampling from val predictions...
Source: 150 năm trước , 1 nông_nô có_giá gấp 3 lần
Reference: a hundred and fifty years ago , an agricultural
Model: <SOS> 150 years years ago , , a a a
Attention Weights: tensor([[9.1920e-01, 7.9738e-02, 1.0292e-03, 2.5437e-05, 9.9366e-06, 1.0496e-06,
         3.7544e-07, 1.9516e-07, 7.9137e-08, 1.3778e-07],
        [2.3184e-01, 7.1021e-01, 5.6557e-02, 1.0636e-03, 2.5247e-04, 7.1204e-05,
         1.2102e-05, 1.7042e-06, 2.0423e-07, 2.0175e-07],
        [2.6887e-02, 4.7439e-01, 4.5014e-01, 3.8618e-02, 7.4716e-03, 2.1181e-03,
         3.6030e-04, 1.8544e-05, 6.0108e-07, 4.9440e-07],
        [7.0404e-03, 1.5794e-01, 4.3173e-01, 2.3840e-01, 1.4035e-01, 1.9944e-02,
         4.0499e-03, 5.1216e-04, 2.4575e-05, 1.3789e-05],
        [8.9895e-04, 1.5839e-02, 8.3509e-02, 1.7137e-01, 6.4040e-01, 7.6722e-02,
         9.4271e-03, 1.6688e-03, 1.0241e-04, 5.7899e



Epoch: 4.19, Train Loss: 0.00, Val Loss: 4.40, Train BLEU: 0.00, Val BLEU: 12.37, Minutes Elapsed: 218.90
Sampling from val predictions...
Source: với một cộng_đồng 10,000 người , chúng_ta có_thể có khoảng
Reference: in a community of 10,000 people , we get
Model: <SOS> in a , , , , , we can
Attention Weights: tensor([[9.8742e-01, 1.2254e-02, 2.8464e-04, 3.6559e-05, 1.3613e-06, 5.2841e-08,
         3.5269e-08, 8.0767e-09, 7.9574e-10, 2.5478e-10],
        [6.1913e-02, 5.2460e-01, 2.9742e-01, 1.0716e-01, 8.5695e-03, 1.9527e-04,
         1.3434e-04, 8.8777e-06, 6.3179e-07, 3.0768e-07],
        [1.4722e-02, 1.0836e-01, 4.4864e-01, 2.7112e-01, 1.5057e-01, 4.1743e-03,
         2.0138e-03, 3.6828e-04, 3.1224e-05, 5.9687e-06],
        [1.1279e-03, 1.2389e-02, 3.5989e-02, 8.9899e-02, 8.0186e-01, 3.9612e-02,
         1.8579e-02, 5.0434e-04, 3.7716e-05, 4.3629e-06],
        [2.5161e-04, 1.8865e-03, 5.6850e-03, 2.3490e-02, 7.3676e-01, 1.0342e-01,
         1.2571e-01, 2.6686e-03, 1.1774e-04, 8.3178



Epoch: 4.24, Train Loss: 0.00, Val Loss: 4.39, Train BLEU: 0.00, Val BLEU: 12.52, Minutes Elapsed: 221.39
Sampling from val predictions...
Source: ổ_gà , đương_nhiên , có_thể trở_thành một vấn_đề , nhưng
Reference: potholes , of course , that can become a
Model: <SOS> so , the course , , can be a
Attention Weights: tensor([[9.7777e-01, 1.4577e-02, 7.6480e-03, 2.4063e-06, 6.6184e-07, 1.9581e-08,
         2.2115e-09, 3.5008e-10, 1.8378e-10, 1.2379e-10],
        [1.1808e-01, 1.3716e-01, 7.4204e-01, 1.9272e-03, 7.2406e-04, 6.5179e-05,
         3.4055e-06, 5.8421e-07, 5.3593e-08, 1.5624e-08],
        [1.5990e-02, 2.1919e-02, 9.2611e-01, 9.2604e-03, 2.4312e-02, 2.2270e-03,
         1.7038e-04, 9.0436e-06, 1.5468e-07, 9.4559e-08],
        [3.4735e-03, 1.2460e-02, 7.4103e-01, 4.1571e-02, 1.7268e-01, 2.6080e-02,
         2.5521e-03, 1.5656e-04, 1.6845e-06, 1.6293e-06],
        [1.7073e-04, 4.6096e-04, 1.7956e-02, 2.2648e-02, 6.3173e-01, 2.5794e-01,
         6.3028e-02, 5.9149e-03, 7.5566e-05, 8



Epoch: 4.29, Train Loss: 0.00, Val Loss: 4.40, Train BLEU: 0.00, Val BLEU: 12.31, Minutes Elapsed: 223.88
Sampling from val predictions...
Source: phụ_nữ thắng <UNK> phần_trăm trong quốc_hội nhà_nước ở những cuộc
Reference: women won <UNK> percent of the national congress in
Model: <SOS> women women the the in the in in in
Attention Weights: tensor([[9.6961e-01, 3.0354e-02, 2.2570e-05, 7.6112e-06, 9.0507e-07, 9.8105e-08,
         2.3852e-08, 2.6220e-08, 7.2962e-09, 6.4652e-09],
        [1.7338e-01, 7.6750e-01, 2.2227e-02, 3.5034e-02, 1.6234e-03, 2.1210e-04,
         2.2617e-05, 5.5133e-06, 8.7608e-07, 3.4467e-07],
        [6.6884e-02, 4.2430e-01, 1.0740e-01, 3.1442e-01, 5.9846e-02, 2.3446e-02,
         3.1218e-03, 5.4040e-04, 3.7307e-05, 1.0027e-05],
        [5.2675e-03, 6.7323e-02, 7.4555e-02, 6.4879e-01, 1.2657e-01, 7.0093e-02,
         6.8254e-03, 5.2641e-04, 3.5003e-05, 1.0821e-05],
        [1.6494e-03, 2.1933e-02, 3.2784e-02, 4.9623e-01, 2.2031e-01, 1.9849e-01,
         2.5320e-02



Epoch: 4.34, Train Loss: 0.00, Val Loss: 4.39, Train BLEU: 0.00, Val BLEU: 11.91, Minutes Elapsed: 226.39
Sampling from val predictions...
Source: nóng và bụi lan_toả khắp_nơi đến_nỗi camera của tôi bị
Reference: so pervasive was the heat and the dust that
Model: <SOS> and and and and and of the of of
Attention Weights: tensor([[9.9602e-01, 3.0906e-03, 8.7391e-04, 1.2276e-05, 1.5866e-06, 2.2955e-07,
         7.5288e-08, 2.7129e-08, 3.4330e-08, 2.3218e-08],
        [7.6773e-01, 1.5886e-01, 7.0869e-02, 2.0744e-03, 4.2830e-04, 3.1785e-05,
         6.2611e-06, 5.4275e-07, 1.9118e-07, 1.1111e-07],
        [2.9497e-01, 1.5863e-01, 4.6288e-01, 7.0461e-02, 1.2206e-02, 7.8296e-04,
         6.4605e-05, 2.8286e-06, 4.9679e-07, 1.5860e-07],
        [2.0085e-01, 1.3222e-01, 4.0506e-01, 1.6343e-01, 8.7055e-02, 1.0127e-02,
         1.2090e-03, 3.8984e-05, 4.8589e-06, 1.2148e-06],
        [6.6512e-03, 1.3685e-02, 1.2604e-01, 2.7783e-01, 4.1088e-01, 1.3803e-01,
         2.5251e-02, 1.3022e-03, 2.9340e-



Epoch: 4.38, Train Loss: 0.00, Val Loss: 4.39, Train BLEU: 0.00, Val BLEU: 12.67, Minutes Elapsed: 228.89
Sampling from val predictions...
Source: hai điều quan_trọng nhất chúng_ta có là thời_gian và mối
Reference: two of the most valuable things we have are
Model: <SOS> two second the thing that we we have is
Attention Weights: tensor([[7.4137e-01, 2.5701e-01, 1.5637e-03, 5.5898e-05, 2.1890e-06, 4.9949e-07,
         4.4045e-08, 1.0104e-08, 5.0635e-09, 2.9228e-09],
        [1.3565e-02, 6.5823e-01, 2.9015e-01, 3.7075e-02, 8.7596e-04, 9.3585e-05,
         1.0601e-05, 2.1982e-06, 2.1768e-07, 7.6152e-08],
        [7.2853e-03, 2.9568e-01, 4.4813e-01, 2.1258e-01, 2.2002e-02, 1.2473e-02,
         1.7193e-03, 1.1757e-04, 7.7916e-06, 8.3519e-07],
        [3.1064e-04, 2.0254e-02, 1.2138e-01, 3.1526e-01, 3.6854e-01, 1.5175e-01,
         2.0686e-02, 1.6923e-03, 1.1461e-04, 1.4872e-05],
        [7.8978e-04, 3.4610e-02, 4.1731e-02, 2.3763e-01, 3.0825e-01, 3.4658e-01,
         2.8298e-02, 1.9370e-03,



Epoch: 4.43, Train Loss: 0.00, Val Loss: 4.35, Train BLEU: 0.00, Val BLEU: 12.94, Minutes Elapsed: 231.38
Sampling from val predictions...
Source: nhưng thay_vì hỏi sao họ lại chẳng trồng bất_cứ thứ
Reference: but instead of asking them how come they were
Model: <SOS> but instead how do they they they they ,
Attention Weights: tensor([[2.4892e-02, 9.7273e-01, 2.3654e-03, 1.5113e-05, 6.0515e-07, 2.0502e-08,
         1.1146e-08, 3.4377e-09, 3.3617e-09, 2.3195e-09],
        [1.8343e-03, 9.4953e-01, 4.7782e-02, 8.1943e-04, 2.7419e-05, 1.2951e-06,
         7.3737e-07, 1.5846e-07, 5.7236e-08, 2.7103e-08],
        [5.0550e-04, 7.7824e-02, 8.0348e-01, 1.0712e-01, 1.0697e-02, 2.9474e-04,
         7.2002e-05, 4.7444e-06, 7.0360e-07, 1.8204e-07],
        [2.5506e-04, 4.2052e-02, 4.6836e-01, 3.3499e-01, 1.4320e-01, 8.8803e-03,
         2.1196e-03, 1.3934e-04, 8.2057e-06, 1.0951e-06],
        [5.2855e-05, 4.9704e-03, 6.4966e-02, 1.8969e-01, 5.9466e-01, 1.0054e-01,
         4.1772e-02, 3.1888e-03, 1



Epoch: 4.48, Train Loss: 0.00, Val Loss: 4.35, Train BLEU: 0.00, Val BLEU: 12.94, Minutes Elapsed: 233.86
Sampling from val predictions...
Source: nhưng sư_tử rất thông_minh . <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: but lions are very clever . <EOS> <PAD> <PAD>
Model: <SOS> but , &apos;s very . . <EOS> <EOS> .
Attention Weights: tensor([[0.6406, 0.0165, 0.3423, 0.0006, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1491, 0.0283, 0.8110, 0.0113, 0.0003, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0314, 0.0093, 0.8499, 0.1037, 0.0055, 0.0002, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0137, 0.0030, 0.3658, 0.4580, 0.1462, 0.0133, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0172, 0.0030, 0.1125, 0.4776, 0.2865, 0.1031, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1142, 0.0236, 0.1605, 0.1596, 0.2931, 0.2490, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0981, 0.0157, 0.1236, 0.1179, 0.2441, 0.4005, 0.0000, 0.0000, 0



Epoch: 4.53, Train Loss: 0.00, Val Loss: 4.37, Train BLEU: 0.00, Val BLEU: 12.39, Minutes Elapsed: 236.36
Sampling from val predictions...
Source: bạn sẽ bất_ngờ với những gì mà mảnh_đất có_thể làm
Reference: you &apos;d be surprised what the soil could do
Model: <SOS> you will going to to to what of that
Attention Weights: tensor([[9.7696e-01, 2.2943e-02, 9.6363e-05, 1.0983e-06, 4.8852e-08, 5.0262e-08,
         1.7661e-08, 1.0437e-08, 3.9299e-09, 2.0487e-09],
        [5.4831e-02, 9.3197e-01, 1.3134e-02, 6.4935e-05, 1.1309e-06, 5.2221e-07,
         5.1622e-08, 1.6202e-08, 1.5214e-09, 7.6531e-10],
        [3.8985e-02, 4.8408e-01, 4.5281e-01, 2.3119e-02, 5.1202e-04, 4.2530e-04,
         6.1999e-05, 1.1980e-05, 4.5223e-07, 1.2647e-07],
        [2.4563e-03, 1.5151e-02, 6.2583e-01, 3.0331e-01, 2.5851e-02, 2.3678e-02,
         3.2906e-03, 4.0143e-04, 2.5239e-05, 7.1499e-06],
        [3.6515e-03, 1.9075e-02, 5.4079e-01, 2.9905e-01, 7.3158e-02, 4.7356e-02,
         1.3221e-02, 3.2390e-03, 3.40



Epoch: 4.58, Train Loss: 0.00, Val Loss: 4.33, Train BLEU: 0.00, Val BLEU: 12.47, Minutes Elapsed: 238.85
Sampling from val predictions...
Source: ở trường , chúng_tôi dành rất nhiều thời_gian để học
Reference: in school , we spent a lot of time
Model: <SOS> in the , we we a lot of to
Attention Weights: tensor([[8.8540e-01, 1.1386e-01, 5.9631e-04, 1.3690e-04, 1.4545e-06, 1.5564e-07,
         3.6850e-08, 1.9086e-08, 1.4902e-08, 7.0772e-09],
        [1.5737e-01, 7.4043e-01, 4.8497e-02, 5.1723e-02, 1.8363e-03, 1.2866e-04,
         9.7611e-06, 2.0285e-06, 3.1578e-07, 7.7748e-08],
        [4.4795e-02, 2.1952e-01, 6.7149e-02, 5.9941e-01, 6.3780e-02, 4.9459e-03,
         3.5585e-04, 3.7754e-05, 3.6451e-06, 3.4640e-07],
        [4.7190e-03, 1.7428e-02, 2.8828e-02, 9.2389e-01, 2.1550e-02, 2.9849e-03,
         4.7155e-04, 1.0581e-04, 2.2084e-05, 1.5493e-06],
        [2.6915e-04, 1.4225e-03, 1.8058e-03, 1.3711e-01, 8.0058e-01, 5.5078e-02,
         3.4176e-03, 2.9912e-04, 1.7589e-05, 2.2853e-06],




Epoch: 4.62, Train Loss: 0.00, Val Loss: 4.33, Train BLEU: 0.00, Val BLEU: 13.16, Minutes Elapsed: 241.38
Sampling from val predictions...
Source: đây là cuộc_sống trong sáu tháng của tôi , đã
Reference: this is six months of my life , into
Model: <SOS> this is the in my my , , ,
Attention Weights: tensor([[9.5454e-01, 4.4116e-02, 1.3241e-03, 1.6484e-05, 5.8697e-07, 6.0658e-08,
         4.7286e-09, 1.7585e-09, 7.1654e-10, 6.2534e-10],
        [9.0895e-02, 6.1984e-01, 2.8092e-01, 7.9765e-03, 3.3108e-04, 3.3498e-05,
         1.2741e-06, 1.9155e-07, 2.1444e-08, 8.9168e-09],
        [5.5682e-03, 1.9896e-02, 8.0131e-01, 1.2762e-01, 3.7609e-02, 7.3782e-03,
         5.7506e-04, 4.4128e-05, 2.2804e-06, 8.9386e-07],
        [1.3307e-03, 3.3207e-03, 3.9399e-01, 3.9377e-01, 1.6317e-01, 4.2648e-02,
         1.5371e-03, 2.1177e-04, 9.3478e-06, 2.4306e-06],
        [3.3150e-04, 6.5081e-04, 2.2977e-02, 2.2280e-01, 5.4235e-01, 1.8808e-01,
         1.8344e-02, 4.3666e-03, 9.6375e-05, 9.1249e-06],
     



Epoch: 4.67, Train Loss: 0.00, Val Loss: 4.34, Train BLEU: 0.00, Val BLEU: 12.93, Minutes Elapsed: 243.88
Sampling from val predictions...
Source: tôi gặp những bé trai này lúc 5 giờ sáng
Reference: i met these boys at five in the morning
Model: <SOS> i &apos;ve my i had five five ago years
Attention Weights: tensor([[9.3891e-01, 6.0568e-02, 5.1769e-04, 7.2957e-06, 2.5711e-07, 3.4264e-08,
         1.5215e-08, 6.1697e-09, 1.3320e-09, 6.1399e-10],
        [4.6501e-02, 8.6086e-01, 9.1847e-02, 7.5312e-04, 3.8282e-05, 2.2229e-06,
         6.2081e-07, 1.2591e-07, 8.9986e-09, 1.2936e-09],
        [1.1910e-02, 2.7908e-01, 4.4711e-01, 1.9216e-01, 5.2914e-02, 1.0553e-02,
         5.9979e-03, 2.4951e-04, 1.9030e-05, 1.0638e-06],
        [9.0696e-04, 1.2670e-02, 1.9866e-01, 2.1242e-01, 4.0978e-01, 6.5532e-02,
         9.3173e-02, 6.1736e-03, 6.1436e-04, 6.8417e-05],
        [2.8522e-05, 2.2317e-04, 1.9967e-03, 7.7013e-03, 6.1444e-02, 7.1967e-02,
         6.6837e-01, 1.4230e-01, 3.5624e-02, 1.0345e



Epoch: 4.72, Train Loss: 0.00, Val Loss: 4.31, Train BLEU: 0.00, Val BLEU: 12.94, Minutes Elapsed: 246.39
Sampling from val predictions...
Source: không lâu sau đó , khi tôi đi qua một
Reference: soon after , when i was walking past a
Model: <SOS> not , , , i went to a a
Attention Weights: tensor([[9.1142e-01, 8.7940e-02, 5.5042e-04, 8.4566e-05, 2.4134e-06, 7.9110e-07,
         1.4780e-08, 2.2348e-09, 6.0496e-10, 2.1802e-10],
        [4.1549e-02, 4.5716e-01, 3.4986e-01, 1.3388e-01, 1.2424e-02, 4.9923e-03,
         1.1872e-04, 1.5011e-05, 1.9408e-06, 1.5587e-07],
        [5.8534e-02, 1.3029e-01, 3.6770e-01, 2.7253e-01, 1.0650e-01, 6.0112e-02,
         3.1586e-03, 9.5087e-04, 2.0238e-04, 2.2271e-05],
        [8.0425e-03, 1.1824e-02, 3.8645e-02, 1.0654e-01, 1.3035e-01, 4.6583e-01,
         2.1369e-01, 2.1922e-02, 2.7429e-03, 4.0581e-04],
        [4.2246e-04, 2.8430e-04, 2.7793e-03, 2.1041e-02, 6.5358e-02, 7.6857e-01,
         1.1652e-01, 2.1175e-02, 3.0154e-03, 8.2892e-04],
        [8.940



Epoch: 4.77, Train Loss: 0.00, Val Loss: 4.38, Train BLEU: 0.00, Val BLEU: 12.57, Minutes Elapsed: 248.92
Sampling from val predictions...
Source: đó là về hiểu những người xung_quanh chúng_ta theo những
Reference: it &apos;s about understanding our neighbors in new and
Model: <SOS> it &apos;s about about people people in us that
Attention Weights: tensor([[9.4041e-01, 5.6010e-02, 3.4896e-03, 8.6894e-05, 1.3316e-06, 1.1865e-07,
         2.9440e-08, 7.5280e-09, 2.3160e-09, 8.1862e-10],
        [5.3352e-02, 5.7687e-01, 3.4335e-01, 2.5651e-02, 7.0158e-04, 7.1167e-05,
         5.1984e-06, 5.9415e-07, 1.3391e-07, 2.1961e-08],
        [3.4455e-02, 8.4769e-02, 7.6395e-01, 9.7352e-02, 1.4367e-02, 4.4334e-03,
         5.9431e-04, 5.6724e-05, 2.3877e-05, 4.1905e-06],
        [1.1236e-03, 2.6421e-03, 1.3464e-01, 5.3287e-01, 2.2527e-01, 9.5115e-02,
         7.2815e-03, 9.1549e-04, 1.2557e-04, 1.8792e-05],
        [1.6500e-04, 7.4901e-04, 3.9313e-02, 5.2240e-01, 2.8944e-01, 1.3169e-01,
         1.3



Epoch: 4.82, Train Loss: 0.00, Val Loss: 4.31, Train BLEU: 0.00, Val BLEU: 12.93, Minutes Elapsed: 251.44
Sampling from val predictions...
Source: vậy , điều đó có nghĩa_là mọi người sẽ có_thể
Reference: so what that means is that people will be
Model: <SOS> so what that means is the the would be
Attention Weights: tensor([[9.8725e-01, 9.8917e-03, 2.8292e-03, 2.4599e-05, 7.2809e-07, 1.2406e-07,
         1.1846e-08, 3.6147e-09, 2.4092e-09, 7.6161e-10],
        [9.6155e-03, 1.0276e-02, 9.4225e-01, 3.5252e-02, 2.1031e-03, 4.8578e-04,
         1.4648e-05, 1.5594e-06, 2.8425e-07, 7.9542e-08],
        [2.9575e-03, 5.1483e-03, 5.8901e-01, 2.6905e-01, 9.2465e-02, 3.9806e-02,
         1.3794e-03, 1.5754e-04, 1.9658e-05, 1.4346e-06],
        [8.9724e-04, 2.7572e-03, 6.7145e-02, 1.1167e-01, 2.3764e-01, 5.1507e-01,
         4.9440e-02, 1.3149e-02, 2.0646e-03, 1.6879e-04],
        [2.2346e-05, 5.2982e-05, 5.0932e-03, 1.7044e-02, 6.8340e-02, 2.3623e-01,
         3.9738e-01, 2.2490e-01, 4.4336e-02, 6



Epoch: 4.86, Train Loss: 0.00, Val Loss: 4.29, Train BLEU: 0.00, Val BLEU: 13.14, Minutes Elapsed: 254.00
Sampling from val predictions...
Source: nếu người đó không muốn làm , vậy_thì bạn phải
Reference: if that person doesn &apos;t want to do it
Model: <SOS> if you don &apos;t &apos;t want to , ,
Attention Weights: tensor([[9.7382e-01, 2.5455e-02, 6.6781e-04, 5.6298e-05, 1.3217e-06, 4.0501e-08,
         4.7551e-09, 2.2742e-09, 1.5844e-09, 7.9697e-10],
        [1.8344e-02, 7.2128e-01, 2.2405e-01, 3.5190e-02, 1.1080e-03, 2.8418e-05,
         6.2737e-07, 1.1579e-07, 2.8707e-08, 1.7316e-08],
        [7.6799e-04, 5.1429e-02, 2.2865e-01, 6.0106e-01, 1.1213e-01, 5.9164e-03,
         4.7134e-05, 7.1880e-06, 1.0472e-06, 3.5718e-07],
        [7.3029e-04, 1.0218e-02, 7.3622e-02, 6.2458e-01, 2.5055e-01, 3.8828e-02,
         1.2571e-03, 1.4923e-04, 3.8820e-05, 2.9441e-05],
        [4.4636e-04, 1.5366e-02, 4.3460e-02, 3.6434e-01, 4.7848e-01, 9.3921e-02,
         2.8380e-03, 8.6023e-04, 1.7425e-04,



Epoch: 4.91, Train Loss: 0.00, Val Loss: 4.31, Train BLEU: 0.00, Val BLEU: 12.64, Minutes Elapsed: 256.50
Sampling from val predictions...
Source: tôi chẳng cho họ tiền được , không gì cả
Reference: i couldn &apos;t give them money , nothing .
Model: <SOS> i i trying them them , , , i
Attention Weights: tensor([[8.9774e-01, 1.0093e-01, 1.3220e-03, 4.4527e-06, 3.1409e-07, 3.0540e-08,
         1.3878e-08, 1.2720e-08, 3.4598e-09, 2.3532e-09],
        [4.6474e-03, 9.7415e-01, 2.0949e-02, 2.4353e-04, 1.1260e-05, 2.1282e-07,
         1.9402e-08, 4.0552e-09, 9.3441e-10, 6.8465e-10],
        [1.9424e-02, 6.0350e-01, 3.0196e-01, 5.7587e-02, 1.6975e-02, 5.1804e-04,
         2.9658e-05, 5.7016e-06, 1.1990e-06, 3.0947e-07],
        [2.7081e-03, 2.5956e-02, 4.0648e-01, 2.5226e-01, 2.8329e-01, 2.7820e-02,
         9.5977e-04, 4.7575e-04, 3.7532e-05, 7.3713e-06],
        [3.4985e-05, 9.1787e-04, 3.8793e-02, 2.7719e-01, 5.1105e-01, 1.5987e-01,
         7.7593e-03, 3.8093e-03, 4.7928e-04, 9.5217e-05],




Epoch: 4.96, Train Loss: 0.00, Val Loss: 4.29, Train BLEU: 0.00, Val BLEU: 12.88, Minutes Elapsed: 259.11
Sampling from val predictions...
Source: l. a. dẫn đầu nước mỹ về diện_tích <UNK> mà
Reference: l.a. leads the united states in vacant lots that
Model: <SOS> and the of the of in the <UNK> ,
Attention Weights: tensor([[1.1633e-03, 5.0332e-04, 9.6803e-01, 3.0246e-02, 5.7753e-05, 3.4865e-06,
         2.5195e-07, 5.0653e-08, 2.0072e-08, 6.7359e-08],
        [1.2117e-03, 5.9578e-04, 2.9574e-01, 6.9615e-01, 5.9636e-03, 3.2246e-04,
         1.4191e-05, 2.2195e-06, 7.3383e-07, 2.5904e-07],
        [5.5419e-04, 7.1654e-05, 6.5331e-03, 7.2082e-01, 2.5438e-01, 1.7064e-02,
         5.1991e-04, 5.0358e-05, 2.4217e-06, 4.3603e-07],
        [5.3714e-04, 9.3086e-05, 6.1858e-03, 1.5774e-01, 3.1850e-01, 4.3774e-01,
         7.0844e-02, 8.0718e-03, 2.6298e-04, 2.5718e-05],
        [5.1021e-05, 7.0477e-06, 1.3862e-04, 2.3139e-02, 1.5952e-01, 4.8417e-01,
         2.5079e-01, 7.0131e-02, 1.1213e-02, 8.



Epoch: 5.00, Train Loss: 0.00, Val Loss: 4.35, Train BLEU: 0.00, Val BLEU: 12.74, Minutes Elapsed: 261.20
Sampling from val predictions...
Source: tôi gặp cậu_bé ở khu cứu_trợ mà tổ_chức giải_phóng nô_lệ
Reference: i met him at a shelter where free the
Model: <SOS> i &apos;ve the in the global of the the
Attention Weights: tensor([[9.1180e-01, 8.5753e-02, 2.3864e-03, 5.7657e-05, 1.5391e-06, 5.6394e-08,
         4.4682e-09, 1.9428e-09, 7.2456e-10, 1.9710e-10],
        [1.0281e-02, 8.2202e-01, 1.6188e-01, 5.7080e-03, 1.1525e-04, 1.2036e-06,
         2.8737e-08, 3.9945e-09, 8.3003e-10, 1.8086e-10],
        [1.0496e-02, 3.4952e-01, 2.6672e-01, 3.4185e-01, 2.8094e-02, 3.0839e-03,
         2.2894e-04, 1.3622e-05, 2.2606e-06, 1.4236e-07],
        [1.1394e-03, 8.5994e-03, 5.5495e-02, 4.6465e-01, 3.7833e-01, 8.5857e-02,
         5.5321e-03, 3.5075e-04, 4.7441e-05, 2.1236e-06],
        [6.6004e-04, 2.1071e-03, 1.3644e-02, 9.3719e-02, 4.2808e-01, 3.5398e-01,
         9.5470e-02, 9.9748e-03, 2.180



Epoch: 5.05, Train Loss: 0.00, Val Loss: 4.30, Train BLEU: 0.00, Val BLEU: 13.09, Minutes Elapsed: 263.70
Sampling from val predictions...
Source: chúng_tôi tình_cờ quay_lại new_york đúng một năm sau đó ,
Reference: we happened to be back in new york exactly
Model: <SOS> we we to the back to the francisco ,
Attention Weights: tensor([[9.6491e-01, 3.4966e-02, 1.2707e-04, 2.0862e-06, 1.5295e-08, 1.7973e-09,
         7.1051e-10, 1.4974e-10, 5.0128e-11, 4.4373e-11],
        [9.8690e-03, 9.7684e-01, 1.2998e-02, 2.9281e-04, 1.3109e-06, 3.1706e-08,
         1.0176e-08, 8.7197e-10, 2.0288e-10, 8.2375e-11],
        [1.4883e-02, 4.0522e-01, 5.0790e-01, 6.5963e-02, 5.4648e-03, 3.3643e-04,
         2.1333e-04, 1.1856e-05, 1.0569e-06, 8.6568e-08],
        [4.1815e-04, 1.1091e-02, 3.6501e-01, 4.8786e-01, 1.2049e-01, 9.4023e-03,
         4.9575e-03, 6.5326e-04, 1.1012e-04, 1.0089e-05],
        [7.9794e-05, 8.6407e-04, 5.7279e-02, 3.6395e-01, 4.1799e-01, 1.0199e-01,
         5.0260e-02, 6.1782e-03, 1.



Epoch: 5.10, Train Loss: 0.00, Val Loss: 4.29, Train BLEU: 0.00, Val BLEU: 13.48, Minutes Elapsed: 266.19
Sampling from val predictions...
Source: mỗi đứa con_trai từ 6 đến 9 tuổi , trong
Reference: so a boy , from six to nine years
Model: <SOS> the &apos;s &apos;s of from a , , ,
Attention Weights: tensor([[9.8775e-01, 1.2128e-02, 9.9590e-05, 2.0863e-05, 3.1163e-07, 5.0101e-08,
         1.0471e-08, 9.0763e-09, 5.5003e-09, 3.0511e-09],
        [2.5080e-01, 4.9459e-01, 2.1397e-01, 3.9195e-02, 1.0818e-03, 3.1832e-04,
         1.7756e-05, 2.8237e-05, 1.7781e-06, 5.7852e-07],
        [4.0584e-02, 1.5205e-01, 2.4513e-01, 4.4927e-01, 6.9929e-02, 3.0174e-02,
         1.9766e-03, 1.0435e-02, 4.0753e-04, 3.9619e-05],
        [2.7262e-02, 1.4237e-01, 2.7332e-01, 4.7715e-01, 5.5573e-02, 1.7941e-02,
         1.1393e-03, 5.0939e-03, 1.4252e-04, 9.3496e-06],
        [6.2242e-03, 3.5828e-02, 1.5652e-01, 6.9318e-01, 7.4349e-02, 2.8143e-02,
         8.5723e-04, 4.7045e-03, 1.8068e-04, 1.1814e-05],
    



Epoch: 5.14, Train Loss: 0.00, Val Loss: 4.29, Train BLEU: 0.00, Val BLEU: 13.29, Minutes Elapsed: 268.68
Sampling from val predictions...
Source: nhưng sư_tử rất thông_minh . <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: but lions are very clever . <EOS> <PAD> <PAD>
Model: <SOS> but it &apos;s very . . <EOS> <EOS> .
Attention Weights: tensor([[0.5348, 0.0053, 0.4591, 0.0008, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.1524, 0.0152, 0.8253, 0.0069, 0.0002, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0303, 0.0038, 0.8676, 0.0961, 0.0022, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0201, 0.0019, 0.5485, 0.3520, 0.0760, 0.0014, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0038, 0.0004, 0.0771, 0.6736, 0.2276, 0.0175, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0604, 0.0107, 0.1821, 0.2853, 0.3424, 0.1191, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0764, 0.0105, 0.0924, 0.1413, 0.3310, 0.3483, 0.0000, 0.0000, 



Epoch: 5.19, Train Loss: 0.00, Val Loss: 4.30, Train BLEU: 0.00, Val BLEU: 13.33, Minutes Elapsed: 271.16
Sampling from val predictions...
Source: à . anh ta đang sẵn_sàng . <EOS> <PAD> <PAD>
Reference: ha . he &apos;s ready . <EOS> <PAD> <PAD>
Model: <SOS> okay . he &apos;s a . <EOS> <EOS> .
Attention Weights: tensor([[0.9472, 0.0489, 0.0038, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0386, 0.6582, 0.2927, 0.0079, 0.0025, 0.0001, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0023, 0.0744, 0.8736, 0.0240, 0.0230, 0.0026, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0002, 0.0092, 0.0361, 0.0502, 0.8558, 0.0484, 0.0001, 0.0000, 0.0000,
         0.0000],
        [0.0008, 0.0074, 0.0048, 0.0127, 0.5932, 0.3753, 0.0057, 0.0001, 0.0000,
         0.0000],
        [0.0001, 0.0026, 0.0054, 0.0018, 0.0326, 0.8456, 0.1092, 0.0027, 0.0000,
         0.0000],
        [0.0020, 0.0160, 0.0744, 0.0088, 0.0383, 0.6438, 0.1848, 0.0319, 0.0000,
         0.0



Epoch: 5.24, Train Loss: 0.00, Val Loss: 4.29, Train BLEU: 0.00, Val BLEU: 13.22, Minutes Elapsed: 273.65
Sampling from val predictions...
Source: vậy_nên vào năm 1860 , họ nhìn_thấy cái công_nghệ <UNK>
Reference: so <UNK> , they are seeing this dirty technology
Model: <SOS> so said said , they the the , ,
Attention Weights: tensor([[9.8553e-01, 1.4432e-02, 4.2571e-05, 1.6092e-07, 1.5849e-08, 4.3242e-09,
         1.0832e-09, 2.7161e-10, 8.0916e-11, 4.1352e-11],
        [2.9514e-01, 5.7967e-01, 1.2367e-01, 1.2513e-03, 2.1125e-04, 4.0270e-05,
         1.0646e-05, 1.7036e-06, 4.3468e-07, 1.4829e-07],
        [3.7596e-02, 1.9989e-01, 6.1616e-01, 7.0895e-02, 5.3986e-02, 1.7886e-02,
         3.2297e-03, 3.2150e-04, 3.4137e-05, 2.4800e-06],
        [4.9774e-03, 3.2182e-02, 1.6026e-01, 8.8800e-02, 4.1738e-01, 2.3404e-01,
         5.4145e-02, 7.2902e-03, 8.8223e-04, 4.4061e-05],
        [1.4485e-05, 1.1564e-04, 4.8870e-03, 2.1064e-02, 1.2697e-01, 6.7264e-01,
         1.6622e-01, 6.6943e-03, 1.2



Epoch: 5.29, Train Loss: 0.00, Val Loss: 4.29, Train BLEU: 0.00, Val BLEU: 13.31, Minutes Elapsed: 276.12
Sampling from val predictions...
Source: mỗi bức ảnh gợi nhắc về ai đó hoặc một
Reference: a photo is a reminder of someone or something
Model: <SOS> the is of the in of people who a
Attention Weights: tensor([[9.7459e-01, 2.4798e-02, 5.6390e-04, 3.7912e-05, 4.9500e-06, 1.1727e-06,
         4.2624e-07, 1.6915e-07, 1.0535e-07, 5.8208e-08],
        [1.9537e-01, 4.6537e-01, 3.0366e-01, 2.9809e-02, 5.0714e-03, 6.1060e-04,
         8.6709e-05, 1.3218e-05, 4.3838e-06, 6.8984e-07],
        [5.2787e-02, 1.0938e-01, 2.3945e-01, 2.7658e-01, 1.4399e-01, 1.2875e-01,
         3.5651e-02, 1.1623e-02, 1.6068e-03, 1.8102e-04],
        [1.1824e-02, 3.6782e-02, 1.2744e-01, 3.2262e-01, 2.4991e-01, 1.8797e-01,
         5.4765e-02, 7.9185e-03, 7.0036e-04, 6.4143e-05],
        [4.0252e-04, 3.8724e-03, 3.0080e-02, 8.6760e-02, 2.0568e-01, 2.7380e-01,
         2.4370e-01, 1.3003e-01, 2.1026e-02, 4.6433e-03



Epoch: 5.34, Train Loss: 0.00, Val Loss: 4.30, Train BLEU: 0.00, Val BLEU: 12.92, Minutes Elapsed: 278.61
Sampling from val predictions...
Source: <UNK> tin_tưởng tôi , như một nhà_văn và một phụ_nữ
Reference: <UNK> believed in me , as a writer and
Model: <SOS> <UNK> <UNK> me like as like a and and
Attention Weights: tensor([[7.1335e-01, 2.8125e-01, 5.2888e-03, 8.0600e-05, 3.1310e-05, 4.9877e-06,
         1.1409e-06, 2.8670e-07, 2.3939e-07, 1.5209e-07],
        [4.4053e-02, 8.8482e-01, 6.6360e-02, 4.1719e-03, 5.0492e-04, 6.8169e-05,
         1.8254e-05, 1.3527e-06, 3.4441e-07, 3.0520e-07],
        [1.2261e-02, 3.9613e-01, 4.0721e-01, 1.4140e-01, 3.6311e-02, 5.7020e-03,
         9.1473e-04, 6.4493e-05, 6.7203e-06, 1.3074e-06],
        [4.4712e-04, 4.8909e-02, 1.0417e-01, 1.0150e-01, 6.6397e-01, 6.0897e-02,
         1.8190e-02, 1.6467e-03, 2.3602e-04, 2.6560e-05],
        [2.4337e-04, 2.5300e-02, 6.7041e-02, 3.0061e-02, 6.7201e-01, 1.7409e-01,
         2.7493e-02, 2.1408e-03, 1.4995e-03,



Epoch: 5.38, Train Loss: 0.00, Val Loss: 4.29, Train BLEU: 0.00, Val BLEU: 13.36, Minutes Elapsed: 281.09
Sampling from val predictions...
Source: bức này được chụp vài tuần sau sự_kiện 11/9 ,
Reference: this one was taken just weeks after 9 /
Model: <SOS> this this was a years a years the 9
Attention Weights: tensor([[9.1390e-01, 7.4322e-02, 1.1164e-02, 5.7150e-04, 3.7756e-05, 4.9785e-07,
         4.7515e-08, 6.1627e-09, 1.5651e-09, 8.4159e-10],
        [4.6700e-01, 1.1867e-01, 3.2441e-01, 8.2772e-02, 6.7822e-03, 3.2836e-04,
         3.1547e-05, 5.1277e-06, 7.7101e-07, 3.9223e-08],
        [6.5855e-02, 3.1198e-02, 4.8559e-01, 2.8721e-01, 1.2000e-01, 8.7079e-03,
         1.2344e-03, 1.8176e-04, 2.7168e-05, 1.3570e-06],
        [4.5943e-03, 2.1148e-03, 9.1001e-02, 2.6813e-01, 5.5518e-01, 6.8126e-02,
         9.5234e-03, 1.1347e-03, 1.8872e-04, 9.1821e-06],
        [6.1100e-04, 3.4315e-04, 1.1505e-02, 6.3685e-02, 5.0224e-01, 3.4429e-01,
         5.9378e-02, 1.5777e-02, 2.0253e-03, 1.4876



Epoch: 5.43, Train Loss: 0.00, Val Loss: 4.26, Train BLEU: 0.00, Val BLEU: 13.82, Minutes Elapsed: 283.61
Sampling from val predictions...
Source: và các bạn có bao_giờ tự_hỏi chính mình " nếu
Reference: and if you ever are wondering , &quot; if
Model: <SOS> and you you know the what &quot; &quot; if
Attention Weights: tensor([[1.7627e-02, 8.2347e-01, 1.5594e-01, 2.8937e-03, 6.9489e-05, 1.3703e-06,
         3.2714e-08, 6.1678e-09, 2.4494e-09, 1.3114e-09],
        [4.1257e-03, 3.5796e-01, 5.6504e-01, 6.7658e-02, 5.0298e-03, 1.8136e-04,
         2.3647e-06, 1.8239e-07, 3.5993e-08, 3.5322e-09],
        [3.5956e-05, 1.5986e-03, 5.5932e-02, 4.1835e-01, 5.0985e-01, 1.4132e-02,
         9.1097e-05, 6.2220e-06, 2.4681e-07, 1.6163e-08],
        [3.1365e-05, 7.1221e-04, 9.6771e-03, 8.8877e-02, 5.3494e-01, 3.5518e-01,
         9.4383e-03, 1.0865e-03, 5.6291e-05, 4.2160e-06],
        [7.5729e-05, 8.3919e-04, 7.6695e-03, 7.5843e-02, 3.5083e-01, 5.0649e-01,
         4.5034e-02, 1.1264e-02, 1.8772e-0



Epoch: 5.48, Train Loss: 0.00, Val Loss: 4.25, Train BLEU: 0.00, Val BLEU: 13.55, Minutes Elapsed: 286.08
Sampling from val predictions...
Source: hai điều quan_trọng nhất chúng_ta có là thời_gian và mối
Reference: two of the most valuable things we have are
Model: <SOS> two most most most that we we have is
Attention Weights: tensor([[7.3259e-01, 2.6308e-01, 4.2634e-03, 6.5064e-05, 2.5130e-06, 4.1652e-07,
         6.4716e-08, 2.3006e-08, 5.7342e-09, 3.2298e-09],
        [1.3741e-02, 5.4536e-01, 4.1109e-01, 2.9240e-02, 5.1533e-04, 4.0961e-05,
         1.1509e-05, 6.4583e-06, 4.1888e-07, 1.2392e-07],
        [1.3778e-02, 3.1500e-01, 4.6309e-01, 1.7652e-01, 1.8744e-02, 9.3885e-03,
         2.7317e-03, 7.0269e-04, 3.3032e-05, 3.6007e-06],
        [2.7586e-04, 1.8256e-02, 1.6578e-01, 4.8406e-01, 2.1476e-01, 7.9888e-02,
         2.4739e-02, 1.1608e-02, 5.5003e-04, 8.3663e-05],
        [4.8786e-04, 2.3796e-02, 5.6241e-02, 3.7643e-01, 2.8254e-01, 2.1001e-01,
         3.7822e-02, 1.1976e-02, 6



Epoch: 5.53, Train Loss: 0.00, Val Loss: 4.27, Train BLEU: 0.00, Val BLEU: 13.29, Minutes Elapsed: 288.54
Sampling from val predictions...
Source: em không nói được , nhưng em truyền_đạt niềm_vui theo
Reference: he &apos;s <UNK> , but he communicates joy in
Model: <SOS> he don &apos;t saying , but the to .
Attention Weights: tensor([[9.5321e-01, 4.6575e-02, 2.0316e-04, 1.3195e-05, 2.9349e-07, 3.6517e-08,
         6.6555e-09, 1.0601e-08, 2.4377e-09, 7.0583e-10],
        [1.2465e-02, 9.0829e-01, 7.4904e-02, 4.2423e-03, 9.9313e-05, 1.8091e-06,
         3.0432e-07, 2.2013e-07, 1.9070e-08, 3.4222e-09],
        [1.8304e-02, 7.2320e-01, 1.9153e-01, 6.1535e-02, 5.1218e-03, 2.3747e-04,
         3.0816e-05, 4.0332e-05, 2.1435e-06, 3.7987e-07],
        [5.1441e-02, 1.7014e-01, 3.6884e-01, 3.7150e-01, 2.7961e-02, 8.2541e-03,
         1.0404e-03, 7.2364e-04, 8.0293e-05, 1.3577e-05],
        [2.0863e-02, 8.5155e-02, 1.3034e-01, 5.0857e-01, 1.2516e-01, 1.1723e-01,
         8.7700e-03, 3.3993e-03, 4.0



Epoch: 5.58, Train Loss: 0.00, Val Loss: 4.24, Train BLEU: 0.00, Val BLEU: 13.37, Minutes Elapsed: 291.02
Sampling from val predictions...
Source: nó là nhựa xốp , như tôi đã nói ,
Reference: it &apos;s a porous asphalt , like i said
Model: <SOS> it &apos;s a , , but i i said
Attention Weights: tensor([[9.1883e-01, 7.4521e-02, 5.8769e-03, 7.4529e-04, 2.0742e-05, 4.6568e-06,
         1.5029e-06, 4.5711e-07, 1.1703e-07, 4.6865e-08],
        [4.7247e-02, 8.6124e-01, 8.5180e-02, 6.3040e-03, 2.2297e-05, 4.2974e-06,
         9.0730e-07, 3.4296e-07, 5.2972e-08, 6.1196e-09],
        [9.3792e-03, 3.9573e-02, 6.4511e-01, 2.9835e-01, 5.8816e-03, 1.1847e-03,
         2.2513e-04, 2.3012e-04, 5.9591e-05, 4.7371e-06],
        [7.0504e-04, 2.4206e-03, 1.5258e-01, 7.9682e-01, 4.1483e-02, 5.0310e-03,
         5.8449e-04, 2.5707e-04, 9.2591e-05, 2.1827e-05],
        [1.4272e-03, 4.2949e-03, 3.0613e-02, 1.5279e-01, 2.8480e-01, 5.0272e-01,
         1.7243e-02, 3.9308e-03, 1.8997e-03, 2.7784e-04],
        [



Epoch: 5.62, Train Loss: 0.00, Val Loss: 4.25, Train BLEU: 0.00, Val BLEU: 13.81, Minutes Elapsed: 293.52
Sampling from val predictions...
Source: đọc nó từ một người phụ_nữ châu phi , những
Reference: read it from an african woman , the damage
Model: <SOS> now it from a woman , , , the
Attention Weights: tensor([[9.6350e-01, 3.5716e-02, 7.7319e-04, 6.0623e-06, 8.1880e-08, 2.1072e-08,
         5.1887e-09, 4.2386e-09, 7.6857e-10, 3.8609e-10],
        [1.7406e-01, 5.5660e-01, 2.6327e-01, 5.9624e-03, 9.2237e-05, 1.7286e-05,
         1.2465e-06, 1.2590e-07, 2.1837e-09, 5.0639e-10],
        [5.1527e-02, 8.0515e-02, 6.1391e-01, 2.1004e-01, 3.4764e-02, 8.0128e-03,
         1.1571e-03, 6.6584e-05, 3.8546e-07, 5.7777e-08],
        [1.2008e-04, 2.5617e-04, 1.9664e-02, 2.2101e-01, 4.7469e-01, 1.9660e-01,
         8.3106e-02, 4.5129e-03, 4.0033e-05, 3.7961e-06],
        [6.8195e-05, 1.2446e-04, 9.1695e-03, 8.1123e-02, 2.3135e-01, 2.9986e-01,
         3.3623e-01, 4.0223e-02, 1.7374e-03, 1.1246e-04]



Epoch: 5.67, Train Loss: 0.00, Val Loss: 4.25, Train BLEU: 0.00, Val BLEU: 13.52, Minutes Elapsed: 296.04
Sampling from val predictions...
Source: cộng_đồng của tôi , người maasai , tin rằng chúng_tôi
Reference: my community , the maasai , we believe that
Model: <SOS> my my , , , , , believe that
Attention Weights: tensor([[9.1548e-01, 7.9990e-02, 4.5197e-03, 7.3401e-06, 1.3278e-06, 2.4497e-07,
         2.7941e-08, 9.0947e-09, 7.2969e-09, 2.7765e-09],
        [5.0389e-01, 3.7047e-01, 1.2034e-01, 4.0400e-03, 9.0819e-04, 3.3526e-04,
         1.1337e-05, 1.5724e-06, 6.7872e-07, 1.9393e-07],
        [4.4981e-02, 1.0631e-01, 4.2840e-01, 2.5555e-01, 1.2866e-01, 3.3424e-02,
         2.5446e-03, 8.5172e-05, 4.5497e-05, 3.1053e-06],
        [8.6478e-04, 2.1439e-03, 8.1465e-03, 3.5293e-02, 8.5410e-01, 9.0845e-02,
         6.8660e-03, 1.4686e-03, 2.3591e-04, 4.0005e-05],
        [1.6035e-03, 6.9032e-03, 1.8706e-02, 3.4794e-02, 5.2284e-01, 3.8285e-01,
         2.5380e-02, 5.7279e-03, 1.0976e-03, 9



Epoch: 5.72, Train Loss: 0.00, Val Loss: 4.22, Train BLEU: 0.00, Val BLEU: 13.66, Minutes Elapsed: 298.55
Sampling from val predictions...
Source: trong_suốt những năm_qua đã có sự tiến_bộ to_lớn trong quá_trình
Reference: so great progress and treatment has been made over
Model: <SOS> while the have have have are a a in
Attention Weights: tensor([[9.2538e-01, 7.3918e-02, 6.5911e-04, 4.3386e-05, 1.6783e-06, 7.5514e-08,
         2.0123e-08, 5.4420e-09, 3.2313e-09, 1.1211e-09],
        [7.8446e-02, 7.9355e-01, 1.1568e-01, 1.1813e-02, 4.5259e-04, 4.9327e-05,
         7.2179e-06, 1.3396e-06, 2.7955e-07, 6.1073e-08],
        [7.0423e-03, 2.1553e-01, 4.8287e-01, 2.2814e-01, 4.0583e-02, 1.8884e-02,
         6.4439e-03, 4.9113e-04, 1.6472e-05, 2.9586e-06],
        [8.0316e-04, 4.3971e-02, 1.5783e-01, 4.9009e-01, 1.8865e-01, 9.1543e-02,
         2.3221e-02, 3.6599e-03, 1.8535e-04, 4.7784e-05],
        [4.3411e-05, 2.8524e-03, 4.6404e-02, 2.2984e-01, 2.6235e-01, 2.7231e-01,
         1.4084e-01, 

In [None]:
experiment_results = load_experiment_log(experiment_name=EXPERIMENT_NAME)

In [None]:
plot_single_learning_curve(experiment_results[0]['results'])

In [None]:
summarize_results(experiment_results)[['best_val_loss', 'best_val_bleu', 'runtime', 
                                       'total_params', 'trainable_params', 'dt_created']]

In [None]:
# reload model and test 
checkpoint = torch.load('model_checkpoints/{}.pth.tar'.format(MODEL_NAME), map_location=device)
model.load_state_dict(checkpoint)