In [1]:
import numpy as np 
import pandas as pd 
from data_processing import generate_vocab, process_data, create_dataloaders 
from model import get_pretrained_emb, EncoderDecoder, EncoderRNN, DecoderRNN, DecoderSimpleRNN, EncoderSimpleRNN, \
    Attention, DecoderAttnRNN, DecoderRNNV2, EncoderDecoderAttention
from train_eval import count_parameters, summarize_results, \
    plot_single_learning_curve, load_experiment_log
from train_eval import train_and_eval, train_and_eval_attn 
import importlib
import pickle as pkl 
import torch

In [2]:
# model identification
MODEL_NAME = 'zh-seq2seq-rnn-attention'
SRC_LANG = 'zh'
TARG_LANG = 'en'

# data processing params  
SRC_MAX_SENTENCE_LEN = 10
TARG_MAX_SENTENCE_LEN = 10
SRC_VOCAB_SIZE = 30000 #30000
TARG_VOCAB_SIZE = 30000 #30000

# model architecture params 
NUM_LAYERS = 2 #2 
ENC_HIDDEN_DIM = 300 
DEC_HIDDEN_DIM = ENC_HIDDEN_DIM #2 * ENC_HIDDEN_DIM 
TEACHER_FORCING_RATIO = 1
CLIP_GRAD_MAX_NORM = 1
ENC_DROPOUT = 0 # to actually implement
DEC_DROPOUT = 0 # to actually implement

# training params  
BATCH_SIZE = 32 #32
NUM_EPOCHS = 500
LR = 0.0005 # 0.0005
OPTIMIZER = 'Adam'
LAZY_TRAIN = True

In [3]:
# store as dict to save to results later 
params = {'model_name': MODEL_NAME, 'src_lang': SRC_LANG, 'targ_lang': TARG_LANG, 
          'src_max_sentence_len': SRC_MAX_SENTENCE_LEN, 'targ_max_sentence_len': TARG_MAX_SENTENCE_LEN, 
          'src_vocab_size': SRC_VOCAB_SIZE, 'targ_vocab_size': TARG_VOCAB_SIZE, 
          'num_layers': NUM_LAYERS, 'enc_hidden_dim': ENC_HIDDEN_DIM, 'dec_hidden_dim': DEC_HIDDEN_DIM,
          'teacher_forcing_ratio': TEACHER_FORCING_RATIO, 'clip_grad_max_norm': CLIP_GRAD_MAX_NORM,
          'enc_dropout': ENC_DROPOUT, 'dec_dropout': DEC_DROPOUT, 
          'batch_size': BATCH_SIZE, 'num_epochs': NUM_EPOCHS, 'learning_rate': LR, 'optimizer': OPTIMIZER, 
          'lazy_train': LAZY_TRAIN} 

In [4]:
#vocab_test = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)

In [5]:
#vocab['zh']['id2token'][987]

In [6]:
#vocab['zh']['token2id']['森林']

In [7]:
#vocab['en']['token2id']['activity']

In [8]:
#vocab['en']['id2token'][987]

In [9]:
# # takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [10]:
# reload from pickle 
vocab_filename = "{}-{}-vocab.p".format(SRC_LANG, TARG_LANG)
vocab = pkl.load(open(vocab_filename, "rb"))
data = process_data(SRC_LANG, TARG_LANG, vocab)
data_minibatch = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=BATCH_SIZE) 
data_minitrain = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=1000)

In [11]:
# # takes a long time to process, save to pickle for reimport in future 
# vocab = generate_vocab(SRC_LANG, TARG_LANG, SRC_VOCAB_SIZE, TARG_VOCAB_SIZE)
# vocab_filename = "{}-{}-vocab-fake.p".format(SRC_LANG, TARG_LANG)
# pkl.dump(vocab, open(vocab_filename, "wb"))

In [12]:
# vocab_filename = "{}-{}-vocab-fake.p".format(SRC_LANG, TARG_LANG)
# vocab = pkl.load(open(vocab_filename, "rb"))
# data = process_data(SRC_LANG, TARG_LANG, vocab)
# limited_data = process_data(SRC_LANG, TARG_LANG, vocab, sample_limit=BATCH_SIZE) 

In [13]:
# create dataloaders 
loaders_full = create_dataloaders(data, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minibatch = create_dataloaders(data_minibatch, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)
loaders_minitrain = create_dataloaders(data_minitrain, SRC_MAX_SENTENCE_LEN, TARG_MAX_SENTENCE_LEN, BATCH_SIZE)

In [14]:
# define model 

# encoder = EncoderRNN(enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
#                      pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))
encoder = EncoderSimpleRNN(enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, src_max_sentence_len=SRC_MAX_SENTENCE_LEN,
                           pretrained_word2vec=get_pretrained_emb(vocab[SRC_LANG]['word2vec'], vocab[SRC_LANG]['token2id']))

# decoder = DecoderRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS,
#                       targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                       pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))

# decoder = DecoderRNNV2(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
#                        targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                        pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))
# decoder = DecoderSimpleRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
#                            targ_vocab_size=TARG_VOCAB_SIZE, targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
#                            pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))
decoder = DecoderAttnRNN(dec_hidden_dim=DEC_HIDDEN_DIM, enc_hidden_dim=ENC_HIDDEN_DIM, num_layers=NUM_LAYERS, 
                         targ_vocab_size=TARG_VOCAB_SIZE, src_max_sentence_len=SRC_MAX_SENTENCE_LEN, 
                         targ_max_sentence_len=TARG_MAX_SENTENCE_LEN, 
                         pretrained_word2vec=get_pretrained_emb(vocab[TARG_LANG]['word2vec'], vocab[TARG_LANG]['token2id']))

model = EncoderDecoderAttention(encoder, decoder, vocab[TARG_LANG]['token2id']) 

In [15]:
model, results = train_and_eval_attn(
    model=model, loaders_full=loaders_full, loaders_minibatch=loaders_minibatch, loaders_minitrain=loaders_minitrain, 
    params=params, vocab=vocab, print_intermediate=True, save_checkpoint=True, save_to_log=True, 
    lazy_eval=False, inspect_samples=1)

Epoch: 0.00, Train Loss: 10.13, Val Loss: 10.25, Train BLEU: 0.28, Val BLEU: 0.20
Sampling from training predictions...
Source: 我 真 喜欢 这些 东西 <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i love that kind of stuff . <EOS> <PAD>
Model: <SOS> the the the the the the the dormant dormant
Attention Weights: tensor([[0.1627, 0.1642, 0.1642, 0.1636, 0.1666, 0.1635, 0.0038, 0.0038, 0.0038,
         0.0038],
        [0.1628, 0.1643, 0.1641, 0.1636, 0.1666, 0.1636, 0.0037, 0.0037, 0.0037,
         0.0037],
        [0.1629, 0.1642, 0.1641, 0.1636, 0.1667, 0.1639, 0.0037, 0.0037, 0.0037,
         0.0037],
        [0.1628, 0.1641, 0.1640, 0.1636, 0.1667, 0.1638, 0.0037, 0.0037, 0.0037,
         0.0037],
        [0.1628, 0.1641, 0.1640, 0.1635, 0.1667, 0.1638, 0.0038, 0.0038, 0.0038,
         0.0038],
        [0.1628, 0.1640, 0.1640, 0.1635, 0.1667, 0.1638, 0.0038, 0.0038, 0.0038,
         0.0038],
        [0.1628, 0.1640, 0.1640, 0.1635, 0.1667, 0.1638, 0.0038, 0.0038, 0.0038,
         0.0038],
        [

Epoch: 4.00, Train Loss: 8.97, Val Loss: 9.77, Train BLEU: 0.31, Val BLEU: 0.21
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.0957, 0.1038, 0.1073, 0.1077, 0.1068, 0.1051, 0.1016, 0.0925, 0.0798,
         0.0997],
        [0.0907, 0.1007, 0.1058, 0.1071, 0.1064, 0.1043, 0.1004, 0.0902, 0.0760,
         0.1183],
        [0.0884, 0.0986, 0.1041, 0.1056, 0.1050, 0.1031, 0.0992, 0.0892, 0.0749,
         0.1318],
        [0.0869, 0.0969, 0.1024, 0.1040, 0.1034, 0.1015, 0.0978, 0.0881, 0.0741,
         0.1448],
        [0.0860, 0.0959, 0.1013, 0.1028, 0.1023, 0.1005, 0.0969, 0.0875, 0.0737,
         0.1531],
        [0.0857, 0.0954, 0.1008, 0.1023, 0.1017, 0.0999, 0.0964, 0.0872, 0.0735,
         0.1571],
        [0.0856, 0.0952, 0.1005, 0.1020, 0.1014, 0.0997, 0.0962, 0.0871, 0.0736,
         0.1587],
        [0.0856, 0.0

Epoch: 8.00, Train Loss: 7.37, Val Loss: 9.17, Train BLEU: 0.30, Val BLEU: 0.20
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.0967, 0.0997, 0.0999, 0.0998, 0.1004, 0.1014, 0.1023, 0.1030, 0.1022,
         0.0947],
        [0.0848, 0.0957, 0.1015, 0.1047, 0.1060, 0.1076, 0.1077, 0.1059, 0.1010,
         0.0851],
        [0.0814, 0.0944, 0.1019, 0.1060, 0.1074, 0.1093, 0.1091, 0.1065, 0.1008,
         0.0833],
        [0.0806, 0.0941, 0.1020, 0.1064, 0.1078, 0.1097, 0.1094, 0.1065, 0.1006,
         0.0828],
        [0.0804, 0.0941, 0.1021, 0.1065, 0.1080, 0.1098, 0.1095, 0.1065, 0.1005,
         0.0826],
        [0.0804, 0.0941, 0.1021, 0.1066, 0.1080, 0.1099, 0.1095, 0.1065, 0.1004,
         0.0825],
        [0.0804, 0.0941, 0.1021, 0.1066, 0.1081, 0.1099, 0.1095, 0.1065, 0.1004,
         0.0825],
        [0.0804, 0.0941, 0.1021, 0.

Epoch: 12.00, Train Loss: 5.76, Val Loss: 8.75, Train BLEU: 0.28, Val BLEU: 0.19
Sampling from training predictions...
Source: 还有 这些 摇晃 着 旋转 转着 的 触角 <EOS> <PAD>
Reference: it &apos;s got tentacles dangling , swirling around like
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.0940, 0.0989, 0.1014, 0.1031, 0.1046, 0.1063, 0.1051, 0.1006, 0.0831,
         0.1029],
        [0.0380, 0.0463, 0.0498, 0.0515, 0.0521, 0.0516, 0.0506, 0.0446, 0.0304,
         0.5851],
        [0.0219, 0.0273, 0.0297, 0.0309, 0.0312, 0.0306, 0.0301, 0.0262, 0.0177,
         0.7546],
        [0.0174, 0.0219, 0.0238, 0.0248, 0.0250, 0.0245, 0.0241, 0.0210, 0.0142,
         0.8035],
        [0.0158, 0.0198, 0.0216, 0.0225, 0.0227, 0.0222, 0.0218, 0.0190, 0.0129,
         0.8218],
        [0.0150, 0.0188, 0.0204, 0.0213, 0.0215, 0.0210, 0.0207, 0.0180, 0.0122,
         0.8311],
        [0.0146, 0.0183, 0.0199, 0.0207, 0.0209, 0.0205, 0.0202, 0.0176, 0.0119,
         0.8355],
        [0

Epoch: 16.00, Train Loss: 4.80, Val Loss: 8.92, Train BLEU: 0.33, Val BLEU: 0.21
Sampling from training predictions...
Source: 是 我 最 喜欢 的 因为 它 哪 都 能动
Reference: it &apos;s one of my favorites , because it
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.0918, 0.0977, 0.1001, 0.1014, 0.1025, 0.1032, 0.1039, 0.1046, 0.1035,
         0.0913],
        [0.0824, 0.0998, 0.1044, 0.1063, 0.1075, 0.1084, 0.1093, 0.1093, 0.1026,
         0.0698],
        [0.0801, 0.0997, 0.1052, 0.1073, 0.1086, 0.1094, 0.1103, 0.1101, 0.1020,
         0.0672],
        [0.0796, 0.0994, 0.1052, 0.1074, 0.1088, 0.1096, 0.1106, 0.1103, 0.1020,
         0.0672],
        [0.0794, 0.0992, 0.1051, 0.1075, 0.1089, 0.1097, 0.1106, 0.1104, 0.1019,
         0.0672],
        [0.0794, 0.0991, 0.1051, 0.1075, 0.1089, 0.1098, 0.1107, 0.1104, 0.1019,
         0.0672],
        [0.0793, 0.0991, 0.1052, 0.1075, 0.1090, 0.1098, 0.1107, 0.1104, 0.1019,
         0.0671],
        [0.0792, 0.0991, 0.1052, 0.

Epoch: 20.00, Train Loss: 4.37, Val Loss: 9.31, Train BLEU: 0.26, Val BLEU: 0.19
Sampling from training predictions...
Source: 还有 这些 摇晃 着 旋转 转着 的 触角 <EOS> <PAD>
Reference: it &apos;s got tentacles dangling , swirling around like
Model: <SOS> it it <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0438, 0.0441, 0.0446, 0.0452, 0.0455, 0.0455, 0.0456, 0.0448, 0.0399,
         0.6010],
        [0.0187, 0.0215, 0.0223, 0.0227, 0.0229, 0.0228, 0.0228, 0.0216, 0.0151,
         0.8096],
        [0.0084, 0.0100, 0.0105, 0.0107, 0.0108, 0.0108, 0.0108, 0.0101, 0.0069,
         0.9109],
        [0.0054, 0.0065, 0.0068, 0.0070, 0.0071, 0.0070, 0.0071, 0.0066, 0.0045,
         0.9419],
        [0.0045, 0.0054, 0.0057, 0.0059, 0.0059, 0.0059, 0.0059, 0.0055, 0.0038,
         0.9514],
        [0.0042, 0.0050, 0.0053, 0.0054, 0.0055, 0.0055, 0.0055, 0.0051, 0.0035,
         0.9550],
        [0.0041, 0.0049, 0.0052, 0.0053, 0.0053, 0.0053, 0.0053, 0.0050, 0.0034,
         0.9563]

Epoch: 24.00, Train Loss: 4.14, Val Loss: 9.77, Train BLEU: 2.94, Val BLEU: 0.30
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> it the the the . . . <EOS> <EOS>
Attention Weights: tensor([[0.0775, 0.0830, 0.0857, 0.0867, 0.0869, 0.0867, 0.0862, 0.0837, 0.0675,
         0.2560],
        [0.0656, 0.0783, 0.0816, 0.0828, 0.0831, 0.0831, 0.0825, 0.0784, 0.0498,
         0.3149],
        [0.0423, 0.0530, 0.0557, 0.0567, 0.0569, 0.0569, 0.0564, 0.0530, 0.0316,
         0.5376],
        [0.0295, 0.0377, 0.0399, 0.0407, 0.0409, 0.0409, 0.0404, 0.0378, 0.0223,
         0.6699],
        [0.0218, 0.0280, 0.0298, 0.0304, 0.0305, 0.0305, 0.0301, 0.0282, 0.0166,
         0.7541],
        [0.0175, 0.0226, 0.0240, 0.0245, 0.0246, 0.0246, 0.0244, 0.0228, 0.0135,
         0.8014],
        [0.0154, 0.0199, 0.0212, 0.0216, 0.0217, 0.0217, 0.0215, 0.0201, 0.0119,
         0.8251],
        [0.0143, 0.018

Epoch: 28.00, Train Loss: 4.02, Val Loss: 10.12, Train BLEU: 2.76, Val BLEU: 0.23
Sampling from training predictions...
Source: 我们 用 的 是 深海 潜水 潜水艇 <UNK> 号 和
Reference: we use the submarine alvin and we use cameras
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.0930, 0.1000, 0.1034, 0.1045, 0.1047, 0.1036, 0.1014, 0.0971, 0.0992,
         0.0931],
        [0.0878, 0.1017, 0.1055, 0.1067, 0.1069, 0.1059, 0.1016, 0.0965, 0.1006,
         0.0869],
        [0.0839, 0.1015, 0.1062, 0.1078, 0.1082, 0.1075, 0.1030, 0.0975, 0.1009,
         0.0834],
        [0.0814, 0.1013, 0.1066, 0.1084, 0.1090, 0.1084, 0.1040, 0.0980, 0.1009,
         0.0819],
        [0.0801, 0.1012, 0.1068, 0.1088, 0.1094, 0.1089, 0.1045, 0.0982, 0.1010,
         0.0812],
        [0.0795, 0.1011, 0.1069, 0.1089, 0.1096, 0.1091, 0.1048, 0.0983, 0.1010,
         0.0807],
        [0.0793, 0.1011, 0.1070, 0.1090, 0.1096, 0.1092, 0.1049, 0.0984, 0.1010,
         0.0805],
        [0.0791, 0.1011, 0

Epoch: 32.00, Train Loss: 3.91, Val Loss: 10.39, Train BLEU: 2.72, Val BLEU: 0.24
Sampling from training predictions...
Source: 我们 这 有 不少 精彩 的 泰坦 泰坦尼克 坦尼 尼克
Reference: we &apos;ve got some of the most incredible video
Model: <SOS> it the the the the the the the the
Attention Weights: tensor([[0.0930, 0.1013, 0.1047, 0.1059, 0.1058, 0.1050, 0.1029, 0.0967, 0.0946,
         0.0900],
        [0.0893, 0.1026, 0.1063, 0.1075, 0.1076, 0.1070, 0.1048, 0.0965, 0.0935,
         0.0850],
        [0.0863, 0.1027, 0.1069, 0.1084, 0.1086, 0.1083, 0.1066, 0.0981, 0.0934,
         0.0808],
        [0.0837, 0.1025, 0.1074, 0.1091, 0.1095, 0.1092, 0.1076, 0.0991, 0.0936,
         0.0783],
        [0.0823, 0.1024, 0.1077, 0.1095, 0.1100, 0.1097, 0.1082, 0.0995, 0.0936,
         0.0771],
        [0.0817, 0.1024, 0.1079, 0.1098, 0.1102, 0.1099, 0.1084, 0.0997, 0.0936,
         0.0764],
        [0.0814, 0.1025, 0.1080, 0.1099, 0.1103, 0.1100, 0.1085, 0.0999, 0.0936,
         0.0760],
        [0.0813, 0.102

Epoch: 36.00, Train Loss: 3.80, Val Loss: 10.60, Train BLEU: 6.03, Val BLEU: 0.23
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> it &apos;s the the . . . . <EOS>
Attention Weights: tensor([[0.0563, 0.0607, 0.0625, 0.0628, 0.0625, 0.0614, 0.0596, 0.0556, 0.0441,
         0.4744],
        [0.0552, 0.0614, 0.0634, 0.0640, 0.0638, 0.0629, 0.0612, 0.0572, 0.0382,
         0.4727],
        [0.0434, 0.0498, 0.0517, 0.0521, 0.0520, 0.0512, 0.0499, 0.0465, 0.0288,
         0.5746],
        [0.0288, 0.0339, 0.0353, 0.0356, 0.0355, 0.0349, 0.0339, 0.0313, 0.0187,
         0.7122],
        [0.0183, 0.0221, 0.0230, 0.0232, 0.0231, 0.0228, 0.0221, 0.0203, 0.0119,
         0.8132],
        [0.0142, 0.0174, 0.0181, 0.0183, 0.0183, 0.0180, 0.0174, 0.0160, 0.0093,
         0.8531],
        [0.0124, 0.0154, 0.0161, 0.0163, 0.0162, 0.0159, 0.0155, 0.0141, 0.0082,
         0.8700],
        [0.0115, 0.01

Epoch: 40.00, Train Loss: 3.69, Val Loss: 10.74, Train BLEU: 6.14, Val BLEU: 0.24
Sampling from training predictions...
Source: 海洋 里 生物 的 多样 多样性 和 密度 要 比
Reference: the biodiversity and the <UNK> in the ocean is
Model: <SOS> we the the the the the the the the
Attention Weights: tensor([[0.0931, 0.0998, 0.1030, 0.1038, 0.1040, 0.1056, 0.1019, 0.1001, 0.0970,
         0.0917],
        [0.0900, 0.0996, 0.1030, 0.1041, 0.1044, 0.1048, 0.1027, 0.1012, 0.0986,
         0.0915],
        [0.0886, 0.1001, 0.1035, 0.1048, 0.1052, 0.1057, 0.1030, 0.1014, 0.0985,
         0.0892],
        [0.0868, 0.1005, 0.1043, 0.1057, 0.1060, 0.1065, 0.1036, 0.1017, 0.0981,
         0.0868],
        [0.0852, 0.1008, 0.1050, 0.1064, 0.1066, 0.1072, 0.1041, 0.1019, 0.0979,
         0.0849],
        [0.0843, 0.1010, 0.1053, 0.1067, 0.1070, 0.1075, 0.1044, 0.1022, 0.0979,
         0.0838],
        [0.0839, 0.1011, 0.1055, 0.1069, 0.1071, 0.1076, 0.1045, 0.1023, 0.0979,
         0.0832],
        [0.0837, 0.1011, 0.1

Epoch: 44.00, Train Loss: 3.59, Val Loss: 10.83, Train BLEU: 6.29, Val BLEU: 0.26
Sampling from training predictions...
Source: <UNK> 塞尔 <UNK> <UNK> 斯特 说 过 真正 的 探索
Reference: marcel proust said , &quot; the true voyage of
Model: <SOS> marcel proust the , , , , , ,
Attention Weights: tensor([[0.0805, 0.0813, 0.0856, 0.0924, 0.1170, 0.1096, 0.1121, 0.1117, 0.1090,
         0.1009],
        [0.0492, 0.0481, 0.0535, 0.0627, 0.1148, 0.1339, 0.1400, 0.1399, 0.1366,
         0.1212],
        [0.0323, 0.0312, 0.0362, 0.0463, 0.1117, 0.1491, 0.1568, 0.1561, 0.1512,
         0.1291],
        [0.0258, 0.0246, 0.0289, 0.0379, 0.1072, 0.1563, 0.1654, 0.1643, 0.1582,
         0.1315],
        [0.0238, 0.0227, 0.0266, 0.0351, 0.1046, 0.1588, 0.1687, 0.1674, 0.1606,
         0.1316],
        [0.0230, 0.0219, 0.0257, 0.0338, 0.1030, 0.1599, 0.1703, 0.1690, 0.1618,
         0.1314],
        [0.0226, 0.0216, 0.0253, 0.0332, 0.1018, 0.1606, 0.1713, 0.1699, 0.1625,
         0.1311],
        [0.0225, 0.0214

Epoch: 48.00, Train Loss: 3.48, Val Loss: 10.91, Train BLEU: 6.02, Val BLEU: 0.26
Sampling from training predictions...
Source: 我 真 喜欢 这些 东西 <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i love that kind of stuff . <EOS> <PAD>
Model: <SOS> it &apos;s &apos;s a <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0457, 0.0496, 0.0500, 0.0490, 0.0462, 0.0371, 0.1806, 0.1806, 0.1806,
         0.1806],
        [0.0354, 0.0383, 0.0388, 0.0383, 0.0365, 0.0262, 0.1966, 0.1966, 0.1966,
         0.1966],
        [0.0351, 0.0381, 0.0384, 0.0378, 0.0356, 0.0240, 0.1977, 0.1977, 0.1977,
         0.1977],
        [0.0247, 0.0273, 0.0274, 0.0269, 0.0249, 0.0155, 0.2133, 0.2133, 0.2133,
         0.2133],
        [0.0159, 0.0180, 0.0182, 0.0177, 0.0162, 0.0097, 0.2261, 0.2261, 0.2261,
         0.2261],
        [0.0110, 0.0128, 0.0130, 0.0127, 0.0115, 0.0067, 0.2331, 0.2331, 0.2331,
         0.2331],
        [0.0087, 0.0103, 0.0105, 0.0102, 0.0092, 0.0054, 0.2364, 0.2364, 0.2364,
         0.2364],
  

Epoch: 52.00, Train Loss: 3.37, Val Loss: 10.99, Train BLEU: 6.96, Val BLEU: 0.27
Sampling from training predictions...
Source: 这 是 一种 种群 栖 动物 <EOS> <PAD> <PAD> <PAD>
Reference: it &apos;s a colonial animal . <EOS> <PAD> <PAD>
Model: <SOS> it &apos;s &apos;s a <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0515, 0.0565, 0.0570, 0.0563, 0.0548, 0.0512, 0.0402, 0.2108, 0.2108,
         0.2108],
        [0.0422, 0.0458, 0.0463, 0.0459, 0.0451, 0.0426, 0.0306, 0.2338, 0.2338,
         0.2338],
        [0.0451, 0.0488, 0.0491, 0.0486, 0.0477, 0.0444, 0.0306, 0.2286, 0.2286,
         0.2286],
        [0.0361, 0.0396, 0.0397, 0.0393, 0.0384, 0.0352, 0.0226, 0.2497, 0.2497,
         0.2497],
        [0.0237, 0.0266, 0.0267, 0.0264, 0.0258, 0.0233, 0.0142, 0.2778, 0.2778,
         0.2778],
        [0.0149, 0.0172, 0.0174, 0.0172, 0.0167, 0.0150, 0.0089, 0.2976, 0.2976,
         0.2976],
        [0.0107, 0.0126, 0.0128, 0.0127, 0.0124, 0.0110, 0.0064, 0.3071, 0.3071,
         0.3071

Epoch: 56.00, Train Loss: 3.26, Val Loss: 11.07, Train BLEU: 7.51, Val BLEU: 0.27
Sampling from training predictions...
Source: 我 真 喜欢 这些 东西 <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i love that kind of stuff . <EOS> <PAD>
Model: <SOS> it &apos;s a a <EOS> <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0433, 0.0476, 0.0477, 0.0465, 0.0434, 0.0338, 0.1844, 0.1844, 0.1844,
         0.1844],
        [0.0343, 0.0371, 0.0372, 0.0365, 0.0346, 0.0255, 0.1987, 0.1987, 0.1987,
         0.1987],
        [0.0400, 0.0430, 0.0429, 0.0420, 0.0393, 0.0279, 0.1912, 0.1912, 0.1912,
         0.1912],
        [0.0322, 0.0350, 0.0348, 0.0340, 0.0314, 0.0207, 0.2030, 0.2030, 0.2030,
         0.2030],
        [0.0201, 0.0222, 0.0222, 0.0217, 0.0198, 0.0123, 0.2204, 0.2204, 0.2204,
         0.2204],
        [0.0114, 0.0131, 0.0131, 0.0128, 0.0116, 0.0069, 0.2328, 0.2328, 0.2328,
         0.2328],
        [0.0076, 0.0089, 0.0090, 0.0088, 0.0079, 0.0046, 0.2383, 0.2383, 0.2383,
         0.2383],
        

Epoch: 60.00, Train Loss: 3.15, Val Loss: 11.14, Train BLEU: 8.19, Val BLEU: 0.28
Sampling from training predictions...
Source: 我们 用 的 是 深海 潜水 潜水艇 <UNK> 号 和
Reference: we use the submarine alvin and we use cameras
Model: <SOS> we the the the the the the the the
Attention Weights: tensor([[0.0883, 0.0964, 0.0994, 0.1020, 0.1070, 0.1142, 0.1184, 0.1030, 0.0872,
         0.0841],
        [0.0873, 0.0961, 0.0994, 0.1029, 0.1098, 0.1212, 0.1146, 0.0952, 0.0879,
         0.0857],
        [0.0860, 0.0983, 0.1018, 0.1051, 0.1113, 0.1226, 0.1122, 0.0911, 0.0879,
         0.0836],
        [0.0869, 0.1014, 0.1049, 0.1079, 0.1130, 0.1221, 0.1062, 0.0848, 0.0897,
         0.0830],
        [0.0873, 0.1039, 0.1076, 0.1105, 0.1148, 0.1219, 0.1009, 0.0801, 0.0909,
         0.0821],
        [0.0873, 0.1054, 0.1092, 0.1121, 0.1158, 0.1216, 0.0978, 0.0774, 0.0917,
         0.0815],
        [0.0874, 0.1065, 0.1104, 0.1132, 0.1166, 0.1215, 0.0957, 0.0753, 0.0923,
         0.0812],
        [0.0873, 0.1073, 0

Epoch: 64.00, Train Loss: 3.03, Val Loss: 11.22, Train BLEU: 8.25, Val BLEU: 0.27
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> we the the the the the the the the
Attention Weights: tensor([[0.0936, 0.1028, 0.1038, 0.1034, 0.1028, 0.1016, 0.1004, 0.0992, 0.0971,
         0.0954],
        [0.0913, 0.1018, 0.1030, 0.1029, 0.1025, 0.1017, 0.1009, 0.1000, 0.0990,
         0.0969],
        [0.0898, 0.1025, 0.1037, 0.1036, 0.1031, 0.1025, 0.1017, 0.1005, 0.0989,
         0.0937],
        [0.0888, 0.1032, 0.1043, 0.1044, 0.1038, 0.1032, 0.1023, 0.1008, 0.0986,
         0.0907],
        [0.0875, 0.1037, 0.1049, 0.1051, 0.1044, 0.1039, 0.1029, 0.1012, 0.0985,
         0.0881],
        [0.0866, 0.1039, 0.1053, 0.1055, 0.1048, 0.1043, 0.1033, 0.1014, 0.0985,
         0.0863],
        [0.0859, 0.1040, 0.1055, 0.1058, 0.1051, 0.1047, 0.1036, 0.1016, 0.0986,
         0.0850],
        [0.0853, 0.1041, 0.1057, 0

Epoch: 68.00, Train Loss: 2.91, Val Loss: 11.26, Train BLEU: 9.54, Val BLEU: 0.28
Sampling from training predictions...
Source: 大家 想想 海洋 占 了 地球 球面 面积 的 75
Reference: when you think about it , the oceans are
Model: <SOS> it the the the , , , , ,
Attention Weights: tensor([[0.0951, 0.1061, 0.1074, 0.1067, 0.1062, 0.1057, 0.1042, 0.1030, 0.1008,
         0.0649],
        [0.0953, 0.1067, 0.1082, 0.1075, 0.1073, 0.1070, 0.1058, 0.1053, 0.1035,
         0.0533],
        [0.0948, 0.1087, 0.1103, 0.1094, 0.1093, 0.1091, 0.1073, 0.1059, 0.1015,
         0.0438],
        [0.0937, 0.1101, 0.1119, 0.1111, 0.1109, 0.1106, 0.1084, 0.1064, 0.0998,
         0.0371],
        [0.0927, 0.1111, 0.1127, 0.1124, 0.1122, 0.1114, 0.1094, 0.1069, 0.0987,
         0.0325],
        [0.0920, 0.1117, 0.1132, 0.1134, 0.1131, 0.1119, 0.1100, 0.1073, 0.0977,
         0.0297],
        [0.0913, 0.1123, 0.1136, 0.1141, 0.1138, 0.1122, 0.1105, 0.1075, 0.0968,
         0.0279],
        [0.0907, 0.1127, 0.1139, 0.1147, 0.

Epoch: 72.00, Train Loss: 2.80, Val Loss: 11.30, Train BLEU: 10.85, Val BLEU: 0.29
Sampling from training predictions...
Source: 泰坦 泰坦尼克 泰坦尼克号 坦尼 尼克 号 是 拿 了 不少
Reference: the truth of the matter is that the titanic
Model: <SOS> the the the the the the the the the
Attention Weights: tensor([[0.1057, 0.1120, 0.1164, 0.1214, 0.0950, 0.0923, 0.0912, 0.0898, 0.0883,
         0.0878],
        [0.1060, 0.1089, 0.1170, 0.1186, 0.0940, 0.0922, 0.0917, 0.0909, 0.0904,
         0.0904],
        [0.1011, 0.1069, 0.1157, 0.1183, 0.0959, 0.0943, 0.0938, 0.0929, 0.0920,
         0.0891],
        [0.0976, 0.1028, 0.1139, 0.1159, 0.0986, 0.0970, 0.0963, 0.0952, 0.0939,
         0.0889],
        [0.0949, 0.1003, 0.1128, 0.1155, 0.1003, 0.0987, 0.0979, 0.0967, 0.0949,
         0.0880],
        [0.0924, 0.0984, 0.1117, 0.1152, 0.1016, 0.1002, 0.0993, 0.0980, 0.0959,
         0.0874],
        [0.0903, 0.0966, 0.1104, 0.1144, 0.1027, 0.1015, 0.1006, 0.0993, 0.0970,
         0.0872],
        [0.0887, 0.0952,

Epoch: 76.00, Train Loss: 2.68, Val Loss: 11.33, Train BLEU: 10.79, Val BLEU: 0.28
Sampling from training predictions...
Source: 我 真 喜欢 这些 东西 <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i love that kind of stuff . <EOS> <PAD>
Model: <SOS> it &apos;s a a . <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0508, 0.0557, 0.0557, 0.0544, 0.0505, 0.0381, 0.1737, 0.1737, 0.1737,
         0.1737],
        [0.0380, 0.0407, 0.0408, 0.0402, 0.0380, 0.0285, 0.1935, 0.1935, 0.1935,
         0.1935],
        [0.0440, 0.0471, 0.0470, 0.0461, 0.0429, 0.0315, 0.1853, 0.1853, 0.1853,
         0.1853],
        [0.0392, 0.0425, 0.0424, 0.0416, 0.0387, 0.0264, 0.1923, 0.1923, 0.1923,
         0.1923],
        [0.0270, 0.0298, 0.0298, 0.0293, 0.0273, 0.0172, 0.2099, 0.2099, 0.2099,
         0.2099],
        [0.0148, 0.0168, 0.0169, 0.0166, 0.0154, 0.0092, 0.2276, 0.2276, 0.2276,
         0.2276],
        [0.0084, 0.0097, 0.0098, 0.0097, 0.0089, 0.0052, 0.2371, 0.2371, 0.2371,
         0.2371],
        [0.

Epoch: 80.00, Train Loss: 2.57, Val Loss: 11.39, Train BLEU: 11.89, Val BLEU: 0.30
Sampling from training predictions...
Source: 底下 这些 都 是 <UNK> 它们 上上 上上下下 上下 下下
Reference: it &apos;s got these fishing <UNK> on the bottom
Model: <SOS> it &apos;s got got these fishing <UNK> the ,
Attention Weights: tensor([[0.1033, 0.1129, 0.1188, 0.1227, 0.1019, 0.0932, 0.0941, 0.0878, 0.0860,
         0.0793],
        [0.1071, 0.1160, 0.1218, 0.1257, 0.0947, 0.0958, 0.0921, 0.0868, 0.0855,
         0.0744],
        [0.1058, 0.1197, 0.1247, 0.1278, 0.0942, 0.0962, 0.0931, 0.0860, 0.0837,
         0.0688],
        [0.1045, 0.1236, 0.1286, 0.1300, 0.0866, 0.0982, 0.0964, 0.0879, 0.0844,
         0.0599],
        [0.1036, 0.1270, 0.1323, 0.1323, 0.0798, 0.0998, 0.0991, 0.0887, 0.0844,
         0.0529],
        [0.1023, 0.1283, 0.1336, 0.1331, 0.0771, 0.1007, 0.1009, 0.0896, 0.0847,
         0.0498],
        [0.1009, 0.1293, 0.1344, 0.1328, 0.0738, 0.1022, 0.1028, 0.0910, 0.0854,
         0.0474],
        

Epoch: 84.00, Train Loss: 2.45, Val Loss: 11.42, Train BLEU: 13.73, Val BLEU: 0.30
Sampling from training predictions...
Source: 原因 在于 我们 一直 没 把 海洋 当回事 回事 回事儿
Reference: and the problem , i think , is that
Model: <SOS> and the problem , , , , , ,
Attention Weights: tensor([[0.0958, 0.1034, 0.1049, 0.1059, 0.1045, 0.1038, 0.1044, 0.0992, 0.0860,
         0.0919],
        [0.1003, 0.1083, 0.1099, 0.1113, 0.1102, 0.1104, 0.1124, 0.1066, 0.0692,
         0.0616],
        [0.0994, 0.1126, 0.1145, 0.1158, 0.1148, 0.1151, 0.1161, 0.1075, 0.0585,
         0.0458],
        [0.0987, 0.1157, 0.1180, 0.1191, 0.1182, 0.1183, 0.1178, 0.1063, 0.0510,
         0.0370],
        [0.0973, 0.1173, 0.1200, 0.1212, 0.1204, 0.1204, 0.1190, 0.1060, 0.0466,
         0.0318],
        [0.0967, 0.1184, 0.1212, 0.1224, 0.1217, 0.1217, 0.1196, 0.1058, 0.0438,
         0.0288],
        [0.0962, 0.1190, 0.1219, 0.1231, 0.1224, 0.1224, 0.1199, 0.1055, 0.0422,
         0.0272],
        [0.0959, 0.1193, 0.1223, 0.1235, 

Epoch: 88.00, Train Loss: 2.33, Val Loss: 11.48, Train BLEU: 17.61, Val BLEU: 0.29
Sampling from training predictions...
Source: 我 真 喜欢 这些 东西 <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i love that kind of stuff . <EOS> <PAD>
Model: <SOS> it &apos;s a a animal <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0624, 0.0666, 0.0663, 0.0646, 0.0596, 0.0444, 0.1590, 0.1590, 0.1590,
         0.1590],
        [0.0431, 0.0449, 0.0448, 0.0442, 0.0415, 0.0309, 0.1877, 0.1877, 0.1877,
         0.1877],
        [0.0496, 0.0524, 0.0520, 0.0508, 0.0470, 0.0349, 0.1783, 0.1783, 0.1783,
         0.1783],
        [0.0460, 0.0493, 0.0488, 0.0477, 0.0439, 0.0306, 0.1834, 0.1834, 0.1834,
         0.1834],
        [0.0336, 0.0368, 0.0365, 0.0357, 0.0329, 0.0209, 0.2009, 0.2009, 0.2009,
         0.2009],
        [0.0187, 0.0211, 0.0210, 0.0206, 0.0189, 0.0110, 0.2222, 0.2222, 0.2222,
         0.2222],
        [0.0096, 0.0111, 0.0111, 0.0108, 0.0099, 0.0055, 0.2355, 0.2355, 0.2355,
         0.2355],
      

Epoch: 92.00, Train Loss: 2.22, Val Loss: 11.52, Train BLEU: 19.19, Val BLEU: 0.29
Sampling from training predictions...
Source: 是 我 最 喜欢 的 因为 它 哪 都 能动
Reference: it &apos;s one of my favorites , because it
Model: <SOS> it &apos;s &apos;s one my my , , ,
Attention Weights: tensor([[0.0989, 0.1061, 0.1066, 0.1056, 0.1064, 0.1045, 0.1047, 0.1022, 0.0971,
         0.0678],
        [0.1015, 0.1052, 0.1057, 0.1054, 0.1058, 0.1048, 0.1047, 0.1034, 0.0998,
         0.0638],
        [0.0986, 0.1063, 0.1067, 0.1061, 0.1064, 0.1053, 0.1051, 0.1037, 0.0990,
         0.0628],
        [0.0968, 0.1074, 0.1081, 0.1073, 0.1077, 0.1063, 0.1061, 0.1045, 0.0990,
         0.0567],
        [0.0948, 0.1084, 0.1095, 0.1089, 0.1093, 0.1079, 0.1075, 0.1057, 0.0992,
         0.0489],
        [0.0933, 0.1087, 0.1103, 0.1098, 0.1103, 0.1088, 0.1084, 0.1066, 0.0994,
         0.0444],
        [0.0921, 0.1090, 0.1110, 0.1103, 0.1110, 0.1095, 0.1091, 0.1074, 0.0994,
         0.0412],
        [0.0913, 0.1093, 0.1115, 

Epoch: 96.00, Train Loss: 2.11, Val Loss: 11.58, Train BLEU: 24.99, Val BLEU: 0.30
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> and it &apos;s got these jet thrusters thrusters in
Attention Weights: tensor([[0.0923, 0.0984, 0.0993, 0.0983, 0.0991, 0.0968, 0.0953, 0.0929, 0.0955,
         0.1322],
        [0.0975, 0.0987, 0.0995, 0.0989, 0.0995, 0.0984, 0.0971, 0.0951, 0.0931,
         0.1220],
        [0.0975, 0.1021, 0.1029, 0.1022, 0.1026, 0.1017, 0.1001, 0.0977, 0.0871,
         0.1061],
        [0.0972, 0.1039, 0.1046, 0.1037, 0.1043, 0.1034, 0.1018, 0.0995, 0.0854,
         0.0960],
        [0.0973, 0.1070, 0.1079, 0.1066, 0.1076, 0.1064, 0.1049, 0.1025, 0.0797,
         0.0799],
        [0.0973, 0.1109, 0.1121, 0.1109, 0.1119, 0.1106, 0.1086, 0.1053, 0.0699,
         0.0625],
        [0.0968, 0.1136, 0.1151, 0.1138, 0.1148, 0.1133, 0.1111, 0.1069, 0.0633,
         0.0514],
        [0

Epoch: 100.00, Train Loss: 1.99, Val Loss: 11.58, Train BLEU: 24.40, Val BLEU: 0.29
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> and it &apos;s got these jet thrusters thrusters in
Attention Weights: tensor([[0.0919, 0.0984, 0.0995, 0.0981, 0.0995, 0.0966, 0.0948, 0.0926, 0.0951,
         0.1334],
        [0.0967, 0.0984, 0.0995, 0.0985, 0.0994, 0.0980, 0.0959, 0.0940, 0.0934,
         0.1262],
        [0.0966, 0.1015, 0.1026, 0.1017, 0.1022, 0.1012, 0.0987, 0.0963, 0.0880,
         0.1111],
        [0.0962, 0.1034, 0.1043, 0.1032, 0.1037, 0.1029, 0.1003, 0.0981, 0.0868,
         0.1011],
        [0.0963, 0.1067, 0.1077, 0.1062, 0.1072, 0.1062, 0.1034, 0.1012, 0.0811,
         0.0840],
        [0.0960, 0.1110, 0.1124, 0.1110, 0.1120, 0.1109, 0.1077, 0.1044, 0.0701,
         0.0644],
        [0.0952, 0.1140, 0.1158, 0.1143, 0.1153, 0.1139, 0.1105, 0.1063, 0.0628,
         0.0517],
        [

Epoch: 104.00, Train Loss: 1.88, Val Loss: 11.64, Train BLEU: 31.61, Val BLEU: 0.29
Sampling from training predictions...
Source: 海洋 的 平均 深度 是 两英里 英里 <EOS> <PAD> <PAD>
Reference: the average depth is about two miles . <EOS>
Model: <SOS> the average the is about two . <EOS> <EOS>
Attention Weights: tensor([[0.0760, 0.0789, 0.0832, 0.0812, 0.0781, 0.0680, 0.0629, 0.0431, 0.2144,
         0.2144],
        [0.0700, 0.0701, 0.0745, 0.0723, 0.0693, 0.0597, 0.0554, 0.0384, 0.2451,
         0.2451],
        [0.0738, 0.0763, 0.0806, 0.0776, 0.0751, 0.0686, 0.0591, 0.0425, 0.2232,
         0.2232],
        [0.0680, 0.0728, 0.0749, 0.0721, 0.0707, 0.0634, 0.0566, 0.0381, 0.2417,
         0.2417],
        [0.0569, 0.0635, 0.0649, 0.0622, 0.0609, 0.0523, 0.0484, 0.0294, 0.2808,
         0.2808],
        [0.0384, 0.0448, 0.0461, 0.0439, 0.0428, 0.0352, 0.0332, 0.0178, 0.3489,
         0.3489],
        [0.0203, 0.0247, 0.0254, 0.0240, 0.0232, 0.0184, 0.0176, 0.0089, 0.4187,
         0.4187],
        

Epoch: 108.00, Train Loss: 1.78, Val Loss: 11.70, Train BLEU: 36.15, Val BLEU: 0.28
Sampling from training predictions...
Source: 地球 的 大部 大部分 部分 都 是 海水 <EOS> <PAD>
Reference: most of the planet is ocean water . <EOS>
Model: <SOS> it &apos;s the planet is working water . <EOS>
Attention Weights: tensor([[0.0960, 0.0992, 0.1051, 0.1030, 0.0969, 0.0942, 0.0918, 0.0836, 0.0570,
         0.1731],
        [0.0921, 0.0909, 0.0983, 0.0954, 0.0869, 0.0850, 0.0851, 0.0810, 0.0560,
         0.2293],
        [0.0880, 0.0905, 0.0995, 0.0958, 0.0866, 0.0847, 0.0854, 0.0815, 0.0559,
         0.2321],
        [0.0778, 0.0836, 0.0915, 0.0871, 0.0795, 0.0785, 0.0785, 0.0745, 0.0489,
         0.3000],
        [0.0663, 0.0752, 0.0802, 0.0765, 0.0715, 0.0710, 0.0698, 0.0642, 0.0391,
         0.3863],
        [0.0464, 0.0565, 0.0604, 0.0578, 0.0543, 0.0539, 0.0524, 0.0466, 0.0252,
         0.5467],
        [0.0287, 0.0366, 0.0394, 0.0377, 0.0354, 0.0352, 0.0341, 0.0300, 0.0155,
         0.7074],
        [0.

Epoch: 112.00, Train Loss: 1.78, Val Loss: 11.78, Train BLEU: 32.66, Val BLEU: 0.28
Sampling from training predictions...
Source: 这 是 一只 水母 <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: here &apos;s a jelly . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> here &apos;s a jelly . . <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0662, 0.0739, 0.0723, 0.0660, 0.0454, 0.1352, 0.1352, 0.1352, 0.1352,
         0.1352],
        [0.0388, 0.0416, 0.0413, 0.0387, 0.0290, 0.1621, 0.1621, 0.1621, 0.1621,
         0.1621],
        [0.0499, 0.0544, 0.0534, 0.0491, 0.0373, 0.1512, 0.1512, 0.1512, 0.1512,
         0.1512],
        [0.0512, 0.0563, 0.0544, 0.0506, 0.0363, 0.1502, 0.1502, 0.1502, 0.1502,
         0.1502],
        [0.0332, 0.0378, 0.0363, 0.0341, 0.0220, 0.1673, 0.1673, 0.1673, 0.1673,
         0.1673],
        [0.0133, 0.0162, 0.0156, 0.0146, 0.0084, 0.1864, 0.1864, 0.1864, 0.1864,
         0.1864],
        [0.0065, 0.0082, 0.0078, 0.0072, 0.0040, 0.1933, 0.1933, 0.1933, 0.1933,
         0.1933]

Epoch: 116.00, Train Loss: 1.69, Val Loss: 11.80, Train BLEU: 30.24, Val BLEU: 0.28
Sampling from training predictions...
Source: 这儿 基本 基本上 都 没有 被 开发 发过 但是 像
Reference: it &apos;s mostly unexplored , and yet there are
Model: <SOS> and of the the and , , , ,
Attention Weights: tensor([[0.0913, 0.1008, 0.1017, 0.1025, 0.1030, 0.1022, 0.1023, 0.0985, 0.0973,
         0.1004],
        [0.0897, 0.1011, 0.1026, 0.1015, 0.1015, 0.1010, 0.1011, 0.0979, 0.0983,
         0.1054],
        [0.0873, 0.1012, 0.1031, 0.1022, 0.1023, 0.1018, 0.1019, 0.0986, 0.0989,
         0.1027],
        [0.0852, 0.1025, 0.1039, 0.1033, 0.1028, 0.1028, 0.1020, 0.0991, 0.0987,
         0.0997],
        [0.0832, 0.1037, 0.1037, 0.1045, 0.1030, 0.1042, 0.1021, 0.0997, 0.0980,
         0.0980],
        [0.0805, 0.1039, 0.1038, 0.1054, 0.1034, 0.1051, 0.1023, 0.1006, 0.0982,
         0.0969],
        [0.0785, 0.1040, 0.1042, 0.1059, 0.1038, 0.1057, 0.1026, 0.1010, 0.0983,
         0.0959],
        [0.0768, 0.1043, 0.104

Epoch: 120.00, Train Loss: 1.62, Val Loss: 11.86, Train BLEU: 38.40, Val BLEU: 0.27
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> most of the the the the the the the
Attention Weights: tensor([[0.0942, 0.1035, 0.1029, 0.1027, 0.1015, 0.1013, 0.0992, 0.0980, 0.0962,
         0.1006],
        [0.0945, 0.1041, 0.1017, 0.1007, 0.1012, 0.0998, 0.0979, 0.0985, 0.0961,
         0.1055],
        [0.0901, 0.1044, 0.1024, 0.1014, 0.1023, 0.1002, 0.0988, 0.1005, 0.0964,
         0.1035],
        [0.0886, 0.1057, 0.1027, 0.1023, 0.1023, 0.1012, 0.1001, 0.1008, 0.0971,
         0.0991],
        [0.0851, 0.1059, 0.1033, 0.1038, 0.1023, 0.1030, 0.1023, 0.1013, 0.0986,
         0.0944],
        [0.0822, 0.1060, 0.1041, 0.1048, 0.1027, 0.1042, 0.1038, 0.1019, 0.0995,
         0.0908],
        [0.0803, 0.1061, 0.1046, 0.1056, 0.1030, 0.1052, 0.1046, 0.1022, 0.0998,
         0.0886],
        [0.0797, 0.1062, 0.1048

Epoch: 124.00, Train Loss: 1.60, Val Loss: 11.78, Train BLEU: 32.83, Val BLEU: 0.44
Sampling from training predictions...
Source: <UNK> 塞尔 <UNK> <UNK> 斯特 说 过 真正 的 探索
Reference: marcel proust said , &quot; the true voyage of
Model: <SOS> marcel proust said said &quot; true voyage voyage voyage
Attention Weights: tensor([[0.1178, 0.1316, 0.1317, 0.1456, 0.0948, 0.0724, 0.0777, 0.0757, 0.0763,
         0.0763],
        [0.1072, 0.1171, 0.1169, 0.1260, 0.0950, 0.0808, 0.0886, 0.0880, 0.0888,
         0.0915],
        [0.0831, 0.0886, 0.0925, 0.1104, 0.1052, 0.0955, 0.1060, 0.1053, 0.1067,
         0.1066],
        [0.0574, 0.0608, 0.0653, 0.0835, 0.1091, 0.1164, 0.1289, 0.1282, 0.1290,
         0.1213],
        [0.0464, 0.0490, 0.0533, 0.0690, 0.1048, 0.1286, 0.1420, 0.1401, 0.1404,
         0.1265],
        [0.0426, 0.0448, 0.0490, 0.0633, 0.1016, 0.1331, 0.1469, 0.1448, 0.1451,
         0.1288],
        [0.0408, 0.0429, 0.0471, 0.0605, 0.0988, 0.1348, 0.1492, 0.1476, 0.1477,
         0.1

Epoch: 128.00, Train Loss: 1.53, Val Loss: 11.95, Train BLEU: 41.98, Val BLEU: 0.28
Sampling from training predictions...
Source: 大卫 <UNK> 这位 是 比尔 <UNK> 我 是 大卫 <UNK>
Reference: this is bill lange . i &apos;m dave gallo
Model: <SOS> this is bill lange . . dave gallo gallo
Attention Weights: tensor([[0.0986, 0.1286, 0.0902, 0.0917, 0.1077, 0.1261, 0.0833, 0.0850, 0.0827,
         0.1060],
        [0.0995, 0.1180, 0.0978, 0.1008, 0.1020, 0.1142, 0.0914, 0.0939, 0.0894,
         0.0930],
        [0.1020, 0.1079, 0.1043, 0.1088, 0.1001, 0.1081, 0.0978, 0.1016, 0.0923,
         0.0771],
        [0.0988, 0.1020, 0.1083, 0.1159, 0.0987, 0.1043, 0.1005, 0.1069, 0.0962,
         0.0684],
        [0.0963, 0.0931, 0.1149, 0.1266, 0.0948, 0.0975, 0.1053, 0.1148, 0.1000,
         0.0567],
        [0.0944, 0.0887, 0.1173, 0.1325, 0.0929, 0.0941, 0.1080, 0.1190, 0.1020,
         0.0511],
        [0.0928, 0.0864, 0.1180, 0.1353, 0.0926, 0.0926, 0.1095, 0.1215, 0.1033,
         0.0480],
        [0.0917,

Epoch: 132.00, Train Loss: 1.45, Val Loss: 11.92, Train BLEU: 44.13, Val BLEU: 0.42
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> it &apos;s all those different working things ? <EOS>
Attention Weights: tensor([[0.0854, 0.0888, 0.0890, 0.0889, 0.0880, 0.0864, 0.0847, 0.0817, 0.0628,
         0.2443],
        [0.0802, 0.0821, 0.0826, 0.0823, 0.0818, 0.0807, 0.0799, 0.0782, 0.0596,
         0.2927],
        [0.0784, 0.0837, 0.0845, 0.0841, 0.0845, 0.0834, 0.0832, 0.0818, 0.0625,
         0.2738],
        [0.0704, 0.0783, 0.0790, 0.0785, 0.0790, 0.0777, 0.0775, 0.0754, 0.0544,
         0.3298],
        [0.0634, 0.0729, 0.0733, 0.0729, 0.0730, 0.0716, 0.0711, 0.0681, 0.0455,
         0.3882],
        [0.0444, 0.0552, 0.0557, 0.0553, 0.0554, 0.0540, 0.0533, 0.0498, 0.0295,
         0.5474],
        [0.0219, 0.0291, 0.0295, 0.0294, 0.0295, 0.0289, 0.0285, 0.0265, 0.0146,
         0.7622]

Epoch: 136.00, Train Loss: 1.38, Val Loss: 11.94, Train BLEU: 50.19, Val BLEU: 0.28
Sampling from training predictions...
Source: 这 是 一种 种群 栖 动物 <EOS> <PAD> <PAD> <PAD>
Reference: it &apos;s a colonial animal . <EOS> <PAD> <PAD>
Model: <SOS> it &apos;s a colonial animal . <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0742, 0.0785, 0.0774, 0.0762, 0.0761, 0.0719, 0.0543, 0.1638, 0.1638,
         0.1638],
        [0.0586, 0.0610, 0.0610, 0.0605, 0.0601, 0.0589, 0.0447, 0.1984, 0.1984,
         0.1984],
        [0.0593, 0.0634, 0.0638, 0.0630, 0.0625, 0.0616, 0.0473, 0.1930, 0.1930,
         0.1930],
        [0.0561, 0.0610, 0.0610, 0.0605, 0.0602, 0.0591, 0.0440, 0.1994, 0.1994,
         0.1994],
        [0.0381, 0.0433, 0.0430, 0.0426, 0.0424, 0.0412, 0.0273, 0.2407, 0.2407,
         0.2407],
        [0.0130, 0.0162, 0.0159, 0.0158, 0.0159, 0.0151, 0.0086, 0.2998, 0.2998,
         0.2998],
        [0.0045, 0.0060, 0.0059, 0.0059, 0.0059, 0.0054, 0.0029, 0.3212, 0.3212,
         0.3212

Epoch: 140.00, Train Loss: 1.30, Val Loss: 11.94, Train BLEU: 55.82, Val BLEU: 0.43
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> most of the the are the the the the
Attention Weights: tensor([[0.0982, 0.1065, 0.1020, 0.1007, 0.0985, 0.0981, 0.0981, 0.0960, 0.0961,
         0.1058],
        [0.0982, 0.1076, 0.1015, 0.0988, 0.0999, 0.0960, 0.0971, 0.0965, 0.0944,
         0.1101],
        [0.0911, 0.1069, 0.1027, 0.0997, 0.1009, 0.0971, 0.0985, 0.0985, 0.0951,
         0.1095],
        [0.0881, 0.1082, 0.1030, 0.1011, 0.1012, 0.0993, 0.0999, 0.0986, 0.0963,
         0.1042],
        [0.0821, 0.1078, 0.1037, 0.1034, 0.1016, 0.1031, 0.1028, 0.0992, 0.0988,
         0.0975],
        [0.0766, 0.1074, 0.1052, 0.1051, 0.1029, 0.1056, 0.1050, 0.1003, 0.1002,
         0.0916],
        [0.0744, 0.1070, 0.1060, 0.1061, 0.1033, 0.1070, 0.1061, 0.1009, 0.1009,
         0.0883],
        [0.0734, 0.1066, 0.1063

Epoch: 144.00, Train Loss: 1.23, Val Loss: 11.96, Train BLEU: 57.85, Val BLEU: 0.57
Sampling from training predictions...
Source: 底下 这些 都 是 <UNK> 它们 上上 上上下下 上下 下下
Reference: it &apos;s got these fishing <UNK> on the bottom
Model: <SOS> it &apos;s got these fishing <UNK> on the bottom
Attention Weights: tensor([[0.1006, 0.1009, 0.1033, 0.1077, 0.1327, 0.0920, 0.0954, 0.0894, 0.0883,
         0.0897],
        [0.1042, 0.1031, 0.1051, 0.1077, 0.1203, 0.0986, 0.0950, 0.0929, 0.0899,
         0.0832],
        [0.0992, 0.1006, 0.1043, 0.1098, 0.1291, 0.0964, 0.0943, 0.0911, 0.0878,
         0.0875],
        [0.0968, 0.1045, 0.1091, 0.1159, 0.1230, 0.0961, 0.0980, 0.0906, 0.0880,
         0.0779],
        [0.0962, 0.1093, 0.1127, 0.1163, 0.1104, 0.0985, 0.1033, 0.0955, 0.0924,
         0.0654],
        [0.0932, 0.1118, 0.1142, 0.1160, 0.1063, 0.0988, 0.1081, 0.0980, 0.0946,
         0.0589],
        [0.0901, 0.1145, 0.1154, 0.1147, 0.0996, 0.1004, 0.1146, 0.1014, 0.0970,
         0.0524],
   

Epoch: 148.00, Train Loss: 1.14, Val Loss: 12.03, Train BLEU: 63.68, Val BLEU: 0.57
Sampling from training predictions...
Source: 原因 在于 我们 一直 没 把 海洋 当回事 回事 回事儿
Reference: and the problem , i think , is that
Model: <SOS> and the problem , , , , is that
Attention Weights: tensor([[0.0945, 0.1015, 0.1004, 0.0980, 0.0997, 0.0994, 0.1025, 0.0949, 0.1015,
         0.1076],
        [0.0954, 0.1037, 0.1028, 0.1011, 0.1016, 0.1028, 0.1077, 0.1007, 0.0969,
         0.0873],
        [0.0920, 0.1057, 0.1068, 0.1056, 0.1046, 0.1073, 0.1123, 0.1051, 0.0923,
         0.0684],
        [0.0901, 0.1114, 0.1131, 0.1114, 0.1113, 0.1140, 0.1159, 0.1073, 0.0774,
         0.0481],
        [0.0868, 0.1153, 0.1178, 0.1158, 0.1178, 0.1195, 0.1175, 0.1089, 0.0656,
         0.0349],
        [0.0850, 0.1170, 0.1205, 0.1189, 0.1217, 0.1223, 0.1176, 0.1098, 0.0588,
         0.0284],
        [0.0841, 0.1176, 0.1215, 0.1202, 0.1231, 0.1234, 0.1175, 0.1097, 0.0564,
         0.0266],
        [0.0836, 0.1178, 0.1218, 0.1

Epoch: 152.00, Train Loss: 1.06, Val Loss: 12.05, Train BLEU: 65.97, Val BLEU: 0.58
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> and it &apos;s got these jet thrusters up in
Attention Weights: tensor([[0.0896, 0.0917, 0.0907, 0.0888, 0.0906, 0.0893, 0.0857, 0.0871, 0.1082,
         0.1782],
        [0.0932, 0.0913, 0.0901, 0.0893, 0.0901, 0.0911, 0.0858, 0.0870, 0.1096,
         0.1724],
        [0.0927, 0.0922, 0.0909, 0.0901, 0.0905, 0.0922, 0.0862, 0.0870, 0.1107,
         0.1675],
        [0.0900, 0.0931, 0.0922, 0.0905, 0.0913, 0.0926, 0.0862, 0.0864, 0.1114,
         0.1661],
        [0.0895, 0.0980, 0.0980, 0.0957, 0.0970, 0.0979, 0.0909, 0.0900, 0.1055,
         0.1376],
        [0.0898, 0.1044, 0.1051, 0.1021, 0.1050, 0.1046, 0.0987, 0.0966, 0.0924,
         0.1013],
        [0.0877, 0.1095, 0.1108, 0.1065, 0.1118, 0.1092, 0.1055, 0.1020, 0.0810,
         0.0761],
        [0.0853,

Epoch: 156.00, Train Loss: 0.99, Val Loss: 12.12, Train BLEU: 70.79, Val BLEU: 0.58
Sampling from training predictions...
Source: 是 我 最 喜欢 的 因为 它 哪 都 能动
Reference: it &apos;s one of my favorites , because it
Model: <SOS> it &apos;s one of my favorites , because because
Attention Weights: tensor([[0.0997, 0.1057, 0.1057, 0.1040, 0.1067, 0.1033, 0.1048, 0.1014, 0.0968,
         0.0719],
        [0.1036, 0.1058, 0.1055, 0.1049, 0.1068, 0.1041, 0.1037, 0.1003, 0.0962,
         0.0692],
        [0.0991, 0.1053, 0.1060, 0.1046, 0.1062, 0.1037, 0.1029, 0.1006, 0.0971,
         0.0744],
        [0.0941, 0.1062, 0.1082, 0.1058, 0.1072, 0.1049, 0.1038, 0.1024, 0.0981,
         0.0693],
        [0.0921, 0.1071, 0.1091, 0.1071, 0.1084, 0.1064, 0.1053, 0.1035, 0.0982,
         0.0629],
        [0.0891, 0.1079, 0.1105, 0.1081, 0.1102, 0.1076, 0.1078, 0.1054, 0.0986,
         0.0549],
        [0.0850, 0.1098, 0.1125, 0.1094, 0.1128, 0.1089, 0.1112, 0.1073, 0.0982,
         0.0449],
        [0.0825, 0

Epoch: 160.00, Train Loss: 0.92, Val Loss: 12.20, Train BLEU: 75.68, Val BLEU: 0.59
Sampling from training predictions...
Source: 还有 这些 摇晃 着 旋转 转着 的 触角 <EOS> <PAD>
Reference: it &apos;s got tentacles dangling , swirling around like
Model: <SOS> it &apos;s got tentacles dangling , swirling around like
Attention Weights: tensor([[0.0968, 0.1002, 0.1008, 0.1013, 0.1017, 0.1150, 0.0841, 0.0834, 0.0637,
         0.1530],
        [0.0969, 0.0958, 0.0955, 0.0955, 0.0963, 0.1053, 0.0789, 0.0809, 0.0598,
         0.1950],
        [0.0960, 0.0976, 0.0981, 0.0977, 0.0987, 0.1112, 0.0809, 0.0828, 0.0675,
         0.1695],
        [0.0852, 0.0926, 0.0959, 0.0938, 0.0967, 0.1090, 0.0752, 0.0767, 0.0597,
         0.2151],
        [0.0809, 0.0892, 0.0908, 0.0902, 0.0912, 0.1007, 0.0746, 0.0744, 0.0535,
         0.2544],
        [0.0674, 0.0810, 0.0814, 0.0826, 0.0813, 0.0917, 0.0663, 0.0642, 0.0385,
         0.3455],
        [0.0406, 0.0551, 0.0556, 0.0572, 0.0551, 0.0646, 0.0439, 0.0417, 0.0211,
    

Epoch: 164.00, Train Loss: 0.85, Val Loss: 12.26, Train BLEU: 78.32, Val BLEU: 0.55
Sampling from training predictions...
Source: 底下 这些 都 是 <UNK> 它们 上上 上上下下 上下 下下
Reference: it &apos;s got these fishing <UNK> on the bottom
Model: <SOS> it &apos;s got these fishing <UNK> on the bottom
Attention Weights: tensor([[0.0970, 0.0994, 0.1016, 0.1067, 0.1360, 0.0920, 0.0969, 0.0893, 0.0886,
         0.0924],
        [0.1005, 0.1010, 0.1022, 0.1052, 0.1254, 0.0972, 0.0966, 0.0926, 0.0904,
         0.0888],
        [0.0958, 0.0980, 0.1010, 0.1066, 0.1342, 0.0962, 0.0956, 0.0907, 0.0875,
         0.0944],
        [0.0933, 0.1013, 0.1049, 0.1124, 0.1286, 0.0976, 0.0993, 0.0906, 0.0878,
         0.0842],
        [0.0930, 0.1056, 0.1078, 0.1129, 0.1142, 0.1007, 0.1051, 0.0965, 0.0931,
         0.0711],
        [0.0897, 0.1080, 0.1095, 0.1127, 0.1112, 0.1003, 0.1097, 0.0990, 0.0958,
         0.0643],
        [0.0856, 0.1107, 0.1107, 0.1108, 0.1046, 0.1013, 0.1175, 0.1032, 0.0988,
         0.0568],
   

Epoch: 168.00, Train Loss: 0.79, Val Loss: 12.36, Train BLEU: 79.62, Val BLEU: 0.41
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> but see all those different working things ? <EOS>
Attention Weights: tensor([[0.0938, 0.0966, 0.0958, 0.0959, 0.0950, 0.0924, 0.0890, 0.0838, 0.0646,
         0.1931],
        [0.0881, 0.0872, 0.0860, 0.0860, 0.0853, 0.0833, 0.0815, 0.0787, 0.0616,
         0.2623],
        [0.0863, 0.0895, 0.0901, 0.0901, 0.0899, 0.0879, 0.0870, 0.0845, 0.0677,
         0.2271],
        [0.0752, 0.0835, 0.0857, 0.0855, 0.0852, 0.0828, 0.0824, 0.0791, 0.0596,
         0.2810],
        [0.0707, 0.0800, 0.0818, 0.0818, 0.0813, 0.0791, 0.0784, 0.0745, 0.0520,
         0.3206],
        [0.0523, 0.0642, 0.0661, 0.0663, 0.0653, 0.0629, 0.0617, 0.0567, 0.0338,
         0.4707],
        [0.0255, 0.0341, 0.0359, 0.0361, 0.0353, 0.0340, 0.0333, 0.0301, 0.0163,
         0.7194],
 

Epoch: 172.00, Train Loss: 0.74, Val Loss: 12.40, Train BLEU: 82.08, Val BLEU: 0.41
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> most of the animals are are the the the
Attention Weights: tensor([[0.0969, 0.1076, 0.1044, 0.1019, 0.1027, 0.0963, 0.0987, 0.0992, 0.0925,
         0.0998],
        [0.1004, 0.1092, 0.1030, 0.1001, 0.1028, 0.0940, 0.0974, 0.0984, 0.0905,
         0.1041],
        [0.0929, 0.1089, 0.1040, 0.1005, 0.1036, 0.0947, 0.0991, 0.1003, 0.0914,
         0.1045],
        [0.0863, 0.1090, 0.1041, 0.1011, 0.1045, 0.0973, 0.1011, 0.1019, 0.0940,
         0.1007],
        [0.0772, 0.1052, 0.1054, 0.1036, 0.1059, 0.1024, 0.1039, 0.1032, 0.0981,
         0.0952],
        [0.0687, 0.1024, 0.1082, 0.1060, 0.1080, 0.1066, 0.1064, 0.1054, 0.1013,
         0.0869],
        [0.0667, 0.1009, 0.1082, 0.1068, 0.1078, 0.1084, 0.1074, 0.1057, 0.1030,
         0.0850],
        [0.0654, 0.0991, 0.

Epoch: 176.00, Train Loss: 0.68, Val Loss: 12.46, Train BLEU: 87.31, Val BLEU: 0.46
Sampling from training predictions...
Source: 原因 在于 我们 一直 没 把 海洋 当回事 回事 回事儿
Reference: and the problem , i think , is that
Model: <SOS> and the problem , i , , is that
Attention Weights: tensor([[0.0934, 0.1029, 0.1018, 0.1007, 0.0996, 0.1016, 0.1077, 0.0924, 0.0968,
         0.1033],
        [0.0932, 0.1035, 0.1018, 0.1020, 0.0988, 0.1027, 0.1105, 0.0960, 0.0974,
         0.0940],
        [0.0902, 0.1046, 0.1060, 0.1069, 0.1018, 0.1070, 0.1132, 0.1004, 0.0950,
         0.0749],
        [0.0882, 0.1097, 0.1123, 0.1126, 0.1089, 0.1142, 0.1180, 0.1030, 0.0805,
         0.0527],
        [0.0831, 0.1128, 0.1170, 0.1161, 0.1170, 0.1207, 0.1210, 0.1057, 0.0694,
         0.0373],
        [0.0806, 0.1137, 0.1188, 0.1182, 0.1218, 0.1234, 0.1218, 0.1077, 0.0633,
         0.0308],
        [0.0791, 0.1139, 0.1196, 0.1190, 0.1233, 0.1246, 0.1219, 0.1076, 0.0618,
         0.0292],
        [0.0782, 0.1141, 0.1198, 0.1

Epoch: 180.00, Train Loss: 0.63, Val Loss: 12.54, Train BLEU: 89.93, Val BLEU: 0.46
Sampling from training predictions...
Source: 大家 想想 海洋 占 了 地球 球面 面积 的 75
Reference: when you think about it , the oceans are
Model: <SOS> when you think about it , the oceans are
Attention Weights: tensor([[0.0920, 0.1086, 0.1084, 0.1068, 0.1079, 0.1074, 0.1038, 0.1057, 0.0969,
         0.0626],
        [0.0928, 0.1074, 0.1064, 0.1059, 0.1068, 0.1046, 0.1050, 0.1081, 0.1004,
         0.0626],
        [0.0878, 0.1091, 0.1106, 0.1070, 0.1090, 0.1091, 0.1064, 0.1074, 0.0973,
         0.0563],
        [0.0838, 0.1109, 0.1125, 0.1102, 0.1103, 0.1113, 0.1091, 0.1080, 0.0954,
         0.0486],
        [0.0810, 0.1108, 0.1144, 0.1129, 0.1126, 0.1138, 0.1108, 0.1091, 0.0946,
         0.0399],
        [0.0777, 0.1113, 0.1142, 0.1154, 0.1144, 0.1144, 0.1126, 0.1090, 0.0943,
         0.0367],
        [0.0749, 0.1119, 0.1146, 0.1174, 0.1156, 0.1154, 0.1141, 0.1088, 0.0930,
         0.0345],
        [0.0727, 0.1116, 

Epoch: 184.00, Train Loss: 0.59, Val Loss: 12.61, Train BLEU: 92.25, Val BLEU: 0.47
Sampling from training predictions...
Source: 我们 得用 非常 特殊 的 仪器 才能 能到 到达 那个
Reference: we have to have a very special technology to
Model: <SOS> we have to have a very special technology to
Attention Weights: tensor([[0.0887, 0.1170, 0.0953, 0.1029, 0.0977, 0.1067, 0.1030, 0.1198, 0.0847,
         0.0843],
        [0.0912, 0.1156, 0.0951, 0.1027, 0.0968, 0.1062, 0.1035, 0.1161, 0.0859,
         0.0869],
        [0.0921, 0.1170, 0.0948, 0.1009, 0.0964, 0.1050, 0.1026, 0.1196, 0.0862,
         0.0855],
        [0.0898, 0.1126, 0.0976, 0.1011, 0.0989, 0.1055, 0.1019, 0.1178, 0.0891,
         0.0857],
        [0.0889, 0.1082, 0.0992, 0.1021, 0.1003, 0.1062, 0.1025, 0.1167, 0.0899,
         0.0860],
        [0.0858, 0.1036, 0.1018, 0.1041, 0.1027, 0.1078, 0.1024, 0.1157, 0.0907,
         0.0854],
        [0.0801, 0.1012, 0.1053, 0.1069, 0.1061, 0.1096, 0.1032, 0.1136, 0.0901,
         0.0839],
        [0.0721

Epoch: 188.00, Train Loss: 0.55, Val Loss: 12.67, Train BLEU: 96.15, Val BLEU: 0.33
Sampling from training predictions...
Source: 我们 得用 非常 特殊 的 仪器 才能 能到 到达 那个
Reference: we have to have a very special technology to
Model: <SOS> we have to have a very special technology to
Attention Weights: tensor([[0.0883, 0.1179, 0.0954, 0.1032, 0.0973, 0.1064, 0.1030, 0.1204, 0.0845,
         0.0836],
        [0.0906, 0.1167, 0.0951, 0.1030, 0.0965, 0.1060, 0.1038, 0.1165, 0.0858,
         0.0861],
        [0.0916, 0.1176, 0.0948, 0.1010, 0.0960, 0.1048, 0.1029, 0.1203, 0.0862,
         0.0849],
        [0.0892, 0.1130, 0.0976, 0.1013, 0.0986, 0.1053, 0.1022, 0.1183, 0.0892,
         0.0853],
        [0.0884, 0.1086, 0.0992, 0.1022, 0.1000, 0.1061, 0.1027, 0.1172, 0.0900,
         0.0856],
        [0.0854, 0.1040, 0.1018, 0.1040, 0.1024, 0.1078, 0.1027, 0.1162, 0.0907,
         0.0850],
        [0.0797, 0.1014, 0.1055, 0.1068, 0.1057, 0.1097, 0.1032, 0.1142, 0.0902,
         0.0835],
        [0.0715

Epoch: 192.00, Train Loss: 0.51, Val Loss: 12.71, Train BLEU: 95.38, Val BLEU: 0.47
Sampling from training predictions...
Source: 我 真 喜欢 这些 东西 <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i love that kind of stuff . <EOS> <PAD>
Model: <SOS> i love that kind of stuff . <EOS> <EOS>
Attention Weights: tensor([[0.0591, 0.0639, 0.0622, 0.0587, 0.0556, 0.0395, 0.1653, 0.1653, 0.1653,
         0.1653],
        [0.0369, 0.0398, 0.0399, 0.0385, 0.0371, 0.0286, 0.1948, 0.1948, 0.1948,
         0.1948],
        [0.0477, 0.0524, 0.0525, 0.0503, 0.0478, 0.0396, 0.1774, 0.1774, 0.1774,
         0.1774],
        [0.0485, 0.0547, 0.0550, 0.0523, 0.0494, 0.0372, 0.1757, 0.1757, 0.1757,
         0.1757],
        [0.0426, 0.0494, 0.0495, 0.0471, 0.0448, 0.0303, 0.1841, 0.1841, 0.1841,
         0.1841],
        [0.0277, 0.0337, 0.0341, 0.0329, 0.0317, 0.0197, 0.2051, 0.2051, 0.2051,
         0.2051],
        [0.0125, 0.0160, 0.0166, 0.0160, 0.0155, 0.0088, 0.2286, 0.2286, 0.2286,
         0.2286],
        [0.

Epoch: 196.00, Train Loss: 0.47, Val Loss: 12.77, Train BLEU: 96.88, Val BLEU: 0.47
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> but see all those different working things ? <EOS>
Attention Weights: tensor([[0.0924, 0.0975, 0.0965, 0.0962, 0.0947, 0.0917, 0.0868, 0.0824, 0.0586,
         0.2032],
        [0.0865, 0.0872, 0.0860, 0.0854, 0.0844, 0.0821, 0.0797, 0.0775, 0.0581,
         0.2730],
        [0.0870, 0.0903, 0.0908, 0.0907, 0.0902, 0.0880, 0.0859, 0.0833, 0.0653,
         0.2285],
        [0.0735, 0.0830, 0.0858, 0.0852, 0.0848, 0.0820, 0.0810, 0.0777, 0.0571,
         0.2898],
        [0.0695, 0.0796, 0.0822, 0.0817, 0.0812, 0.0786, 0.0774, 0.0736, 0.0495,
         0.3266],
        [0.0517, 0.0641, 0.0669, 0.0667, 0.0661, 0.0635, 0.0617, 0.0567, 0.0313,
         0.4713],
        [0.0233, 0.0316, 0.0339, 0.0338, 0.0335, 0.0321, 0.0313, 0.0283, 0.0138,
         0.7385],
 

Epoch: 200.00, Train Loss: 0.44, Val Loss: 12.82, Train BLEU: 97.18, Val BLEU: 0.47
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> this turns out to be the longest creature in
Attention Weights: tensor([[0.0916, 0.0989, 0.0986, 0.1156, 0.1255, 0.0867, 0.0824, 0.0789, 0.0465,
         0.1753],
        [0.0889, 0.0918, 0.0919, 0.1087, 0.1155, 0.0820, 0.0774, 0.0768, 0.0518,
         0.2152],
        [0.0877, 0.0941, 0.0937, 0.1104, 0.1166, 0.0798, 0.0757, 0.0770, 0.0565,
         0.2085],
        [0.0819, 0.0918, 0.0916, 0.1052, 0.1175, 0.0742, 0.0725, 0.0734, 0.0566,
         0.2354],
        [0.0797, 0.0913, 0.0902, 0.1025, 0.1131, 0.0738, 0.0731, 0.0727, 0.0513,
         0.2522],
        [0.0718, 0.0871, 0.0849, 0.0964, 0.1028, 0.0703, 0.0696, 0.0685, 0.0412,
         0.3074],
        [0.0504, 0.0677, 0.0651, 0.0750, 0.0786, 0.0551, 0.0545, 0.0536, 0.0259,
         0.4740],
        [0.

Epoch: 204.00, Train Loss: 0.41, Val Loss: 12.85, Train BLEU: 97.88, Val BLEU: 0.47
Sampling from training predictions...
Source: 泰坦 泰坦尼克 泰坦尼克号 坦尼 尼克 号 是 拿 了 不少
Reference: the truth of the matter is that the titanic
Model: <SOS> the truth of the matter is that the titanic
Attention Weights: tensor([[0.1011, 0.1167, 0.1209, 0.1371, 0.0892, 0.0859, 0.0879, 0.0873, 0.0863,
         0.0875],
        [0.1081, 0.1210, 0.1238, 0.1362, 0.0861, 0.0849, 0.0869, 0.0855, 0.0835,
         0.0840],
        [0.1101, 0.1224, 0.1208, 0.1362, 0.0855, 0.0853, 0.0863, 0.0852, 0.0837,
         0.0845],
        [0.1057, 0.1153, 0.1144, 0.1248, 0.0873, 0.0887, 0.0912, 0.0910, 0.0900,
         0.0916],
        [0.1003, 0.1087, 0.1129, 0.1187, 0.0896, 0.0915, 0.0948, 0.0947, 0.0939,
         0.0948],
        [0.0920, 0.1001, 0.1102, 0.1120, 0.0912, 0.0958, 0.1007, 0.1003, 0.0993,
         0.0985],
        [0.0838, 0.0936, 0.1052, 0.1062, 0.0911, 0.1009, 0.1069, 0.1060, 0.1042,
         0.1020],
        [0.0821

Epoch: 208.00, Train Loss: 0.38, Val Loss: 12.89, Train BLEU: 97.88, Val BLEU: 0.83
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> this turns out to be the longest creature in
Attention Weights: tensor([[0.0909, 0.0983, 0.0977, 0.1183, 0.1259, 0.0866, 0.0818, 0.0782, 0.0456,
         0.1768],
        [0.0881, 0.0912, 0.0910, 0.1098, 0.1156, 0.0815, 0.0767, 0.0762, 0.0510,
         0.2188],
        [0.0875, 0.0938, 0.0930, 0.1120, 0.1165, 0.0796, 0.0752, 0.0767, 0.0555,
         0.2102],
        [0.0817, 0.0916, 0.0911, 0.1071, 0.1185, 0.0743, 0.0724, 0.0734, 0.0561,
         0.2338],
        [0.0798, 0.0913, 0.0901, 0.1043, 0.1149, 0.0742, 0.0732, 0.0728, 0.0513,
         0.2481],
        [0.0722, 0.0870, 0.0843, 0.0976, 0.1045, 0.0708, 0.0698, 0.0687, 0.0414,
         0.3037],
        [0.0501, 0.0669, 0.0634, 0.0748, 0.0792, 0.0551, 0.0543, 0.0534, 0.0260,
         0.4769],
        [0.

Epoch: 212.00, Train Loss: 0.35, Val Loss: 12.92, Train BLEU: 97.88, Val BLEU: 0.83
Sampling from training predictions...
Source: 其实 它们 都 是 由 单独 的 动物 结合 合在
Reference: these are all individual animals banding together to make
Model: <SOS> these are all individual animals banding together to make
Attention Weights: tensor([[0.0914, 0.1014, 0.1025, 0.1038, 0.1048, 0.1049, 0.1034, 0.1043, 0.1021,
         0.0814],
        [0.0917, 0.1006, 0.1012, 0.1020, 0.1029, 0.1030, 0.1020, 0.1053, 0.1046,
         0.0869],
        [0.0903, 0.1006, 0.1022, 0.1040, 0.1045, 0.1051, 0.1029, 0.1041, 0.1036,
         0.0827],
        [0.0884, 0.1021, 0.1038, 0.1054, 0.1054, 0.1060, 0.1035, 0.1017, 0.1014,
         0.0821],
        [0.0873, 0.1052, 0.1070, 0.1088, 0.1087, 0.1094, 0.1068, 0.1030, 0.1021,
         0.0616],
        [0.0843, 0.1073, 0.1097, 0.1119, 0.1115, 0.1126, 0.1099, 0.1041, 0.1018,
         0.0470],
        [0.0808, 0.1081, 0.1110, 0.1134, 0.1129, 0.1137, 0.1111, 0.1045, 0.1017,
         0

Epoch: 216.00, Train Loss: 0.33, Val Loss: 12.95, Train BLEU: 98.94, Val BLEU: 0.84
Sampling from training predictions...
Source: 其实 它们 都 是 由 单独 的 动物 结合 合在
Reference: these are all individual animals banding together to make
Model: <SOS> these are all individual animals banding together to make
Attention Weights: tensor([[0.0910, 0.1011, 0.1022, 0.1040, 0.1052, 0.1051, 0.1038, 0.1039, 0.1019,
         0.0819],
        [0.0915, 0.1003, 0.1008, 0.1021, 0.1032, 0.1030, 0.1022, 0.1049, 0.1042,
         0.0879],
        [0.0901, 0.1004, 0.1020, 0.1042, 0.1049, 0.1053, 0.1034, 0.1037, 0.1029,
         0.0831],
        [0.0883, 0.1018, 0.1035, 0.1055, 0.1058, 0.1061, 0.1039, 0.1014, 0.1007,
         0.0831],
        [0.0873, 0.1049, 0.1067, 0.1087, 0.1089, 0.1094, 0.1071, 0.1026, 0.1013,
         0.0632],
        [0.0845, 0.1071, 0.1093, 0.1116, 0.1117, 0.1125, 0.1102, 0.1036, 0.1010,
         0.0484],
        [0.0812, 0.1079, 0.1105, 0.1130, 0.1129, 0.1137, 0.1114, 0.1039, 0.1012,
         0

Epoch: 220.00, Train Loss: 0.30, Val Loss: 12.99, Train BLEU: 98.94, Val BLEU: 0.99
Sampling from training predictions...
Source: 这 是 一只 水母 <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: here &apos;s a jelly . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> here &apos;s a jelly . <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0478, 0.0544, 0.0524, 0.0491, 0.0286, 0.1535, 0.1535, 0.1535, 0.1535,
         0.1535],
        [0.0316, 0.0348, 0.0346, 0.0333, 0.0234, 0.1685, 0.1685, 0.1685, 0.1685,
         0.1685],
        [0.0428, 0.0472, 0.0465, 0.0448, 0.0352, 0.1567, 0.1567, 0.1567, 0.1567,
         0.1567],
        [0.0425, 0.0473, 0.0460, 0.0449, 0.0313, 0.1576, 0.1576, 0.1576, 0.1576,
         0.1576],
        [0.0194, 0.0229, 0.0221, 0.0218, 0.0124, 0.1803, 0.1803, 0.1803, 0.1803,
         0.1803],
        [0.0021, 0.0027, 0.0028, 0.0027, 0.0013, 0.1977, 0.1977, 0.1977, 0.1977,
         0.1977],
        [0.0014, 0.0018, 0.0018, 0.0018, 0.0008, 0.1985, 0.1985, 0.1985, 0.1985,
         0.1

Epoch: 224.00, Train Loss: 0.28, Val Loss: 13.03, Train BLEU: 98.94, Val BLEU: 0.98
Sampling from training predictions...
Source: 大卫 <UNK> 通过 潜水 潜水艇 拍下 的 影片 把 我们
Reference: with vibrant video clips captured by submarines , david
Model: <SOS> with vibrant video clips captured by submarines , david
Attention Weights: tensor([[0.1060, 0.1195, 0.1005, 0.1060, 0.1382, 0.0867, 0.0874, 0.0872, 0.0839,
         0.0846],
        [0.1069, 0.1135, 0.1059, 0.1113, 0.1317, 0.0878, 0.0868, 0.0872, 0.0830,
         0.0860],
        [0.1102, 0.1237, 0.1015, 0.1096, 0.1351, 0.0845, 0.0850, 0.0845, 0.0812,
         0.0846],
        [0.0998, 0.1198, 0.1016, 0.1099, 0.1358, 0.0891, 0.0884, 0.0868, 0.0843,
         0.0844],
        [0.0894, 0.1049, 0.1021, 0.1105, 0.1369, 0.0959, 0.0941, 0.0913, 0.0889,
         0.0861],
        [0.0834, 0.1016, 0.0996, 0.1084, 0.1401, 0.0991, 0.0975, 0.0933, 0.0913,
         0.0857],
        [0.0792, 0.1041, 0.0950, 0.1038, 0.1408, 0.1003, 0.0998, 0.0959, 0.0941,
        

Epoch: 228.00, Train Loss: 0.26, Val Loss: 13.06, Train BLEU: 100.00, Val BLEU: 0.98
Sampling from training predictions...
Source: 海洋 里 生物 的 多样 多样性 和 密度 要 比
Reference: the biodiversity and the <UNK> in the ocean is
Model: <SOS> the biodiversity and the <UNK> in the ocean is
Attention Weights: tensor([[0.1001, 0.1007, 0.1118, 0.1064, 0.1162, 0.1323, 0.0830, 0.0848, 0.0804,
         0.0843],
        [0.1024, 0.1011, 0.1113, 0.1036, 0.1161, 0.1272, 0.0838, 0.0850, 0.0819,
         0.0875],
        [0.1000, 0.1007, 0.1118, 0.1042, 0.1173, 0.1292, 0.0831, 0.0846, 0.0813,
         0.0879],
        [0.0941, 0.0990, 0.1123, 0.1053, 0.1176, 0.1308, 0.0855, 0.0865, 0.0827,
         0.0861],
        [0.0897, 0.0980, 0.1120, 0.1063, 0.1165, 0.1287, 0.0882, 0.0888, 0.0855,
         0.0864],
        [0.0843, 0.0969, 0.1105, 0.1074, 0.1133, 0.1260, 0.0919, 0.0921, 0.0901,
         0.0876],
        [0.0782, 0.0953, 0.1087, 0.1087, 0.1120, 0.1256, 0.0956, 0.0954, 0.0934,
         0.0869],
        [0.07

Epoch: 232.00, Train Loss: 0.24, Val Loss: 13.10, Train BLEU: 100.00, Val BLEU: 0.98
Sampling from training predictions...
Source: 我们 用 的 是 深海 潜水 潜水艇 <UNK> 号 和
Reference: we use the submarine alvin and we use cameras
Model: <SOS> we use the submarine alvin and we use cameras
Attention Weights: tensor([[0.0870, 0.0942, 0.0982, 0.0982, 0.1094, 0.1035, 0.1179, 0.1147, 0.0817,
         0.0952],
        [0.0857, 0.0922, 0.0951, 0.0971, 0.1095, 0.1040, 0.1208, 0.1111, 0.0834,
         0.1012],
        [0.0807, 0.0878, 0.0916, 0.0944, 0.1111, 0.1072, 0.1320, 0.1193, 0.0785,
         0.0973],
        [0.0772, 0.0902, 0.0965, 0.0980, 0.1139, 0.1093, 0.1286, 0.1156, 0.0783,
         0.0924],
        [0.0797, 0.0937, 0.0991, 0.1009, 0.1140, 0.1096, 0.1186, 0.1053, 0.0839,
         0.0952],
        [0.0785, 0.0954, 0.1008, 0.1017, 0.1135, 0.1099, 0.1173, 0.1051, 0.0839,
         0.0939],
        [0.0778, 0.0950, 0.1006, 0.1018, 0.1117, 0.1119, 0.1179, 0.1091, 0.0831,
         0.0910],
        [0.0

Epoch: 236.00, Train Loss: 0.22, Val Loss: 13.15, Train BLEU: 100.00, Val BLEU: 0.99
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> this turns out to be the longest creature in
Attention Weights: tensor([[0.0897, 0.0988, 0.0977, 0.1193, 0.1239, 0.0861, 0.0820, 0.0770, 0.0438,
         0.1817],
        [0.0859, 0.0900, 0.0892, 0.1096, 0.1131, 0.0803, 0.0764, 0.0744, 0.0490,
         0.2321],
        [0.0853, 0.0923, 0.0908, 0.1113, 0.1143, 0.0788, 0.0751, 0.0752, 0.0534,
         0.2235],
        [0.0809, 0.0914, 0.0902, 0.1065, 0.1194, 0.0746, 0.0727, 0.0733, 0.0559,
         0.2352],
        [0.0799, 0.0919, 0.0896, 0.1038, 0.1176, 0.0756, 0.0743, 0.0739, 0.0523,
         0.2411],
        [0.0728, 0.0871, 0.0833, 0.0969, 0.1080, 0.0730, 0.0714, 0.0701, 0.0427,
         0.2948],
        [0.0476, 0.0626, 0.0587, 0.0700, 0.0798, 0.0544, 0.0527, 0.0516, 0.0253,
         0.4972],
        [0

Epoch: 240.00, Train Loss: 0.21, Val Loss: 13.18, Train BLEU: 100.00, Val BLEU: 0.99
Sampling from training predictions...
Source: 我们 用 的 是 深海 潜水 潜水艇 <UNK> 号 和
Reference: we use the submarine alvin and we use cameras
Model: <SOS> we use the submarine alvin and we use cameras
Attention Weights: tensor([[0.0869, 0.0938, 0.0977, 0.0978, 0.1100, 0.1039, 0.1181, 0.1161, 0.0813,
         0.0945],
        [0.0852, 0.0916, 0.0946, 0.0971, 0.1099, 0.1046, 0.1217, 0.1123, 0.0828,
         0.1002],
        [0.0803, 0.0873, 0.0912, 0.0947, 0.1116, 0.1077, 0.1326, 0.1202, 0.0779,
         0.0964],
        [0.0769, 0.0897, 0.0961, 0.0982, 0.1142, 0.1098, 0.1292, 0.1165, 0.0778,
         0.0917],
        [0.0793, 0.0931, 0.0984, 0.1009, 0.1142, 0.1101, 0.1194, 0.1064, 0.0835,
         0.0947],
        [0.0781, 0.0947, 0.1000, 0.1015, 0.1136, 0.1104, 0.1182, 0.1067, 0.0835,
         0.0934],
        [0.0773, 0.0937, 0.0991, 0.1009, 0.1114, 0.1121, 0.1198, 0.1122, 0.0827,
         0.0908],
        [0.0

Epoch: 244.00, Train Loss: 0.19, Val Loss: 13.21, Train BLEU: 100.00, Val BLEU: 0.98
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> this turns out to be the longest creature in
Attention Weights: tensor([[0.0900, 0.0989, 0.0977, 0.1199, 0.1252, 0.0862, 0.0817, 0.0766, 0.0438,
         0.1799],
        [0.0858, 0.0897, 0.0889, 0.1098, 0.1147, 0.0801, 0.0759, 0.0740, 0.0488,
         0.2324],
        [0.0852, 0.0918, 0.0902, 0.1113, 0.1156, 0.0784, 0.0744, 0.0747, 0.0532,
         0.2251],
        [0.0808, 0.0912, 0.0898, 0.1067, 0.1208, 0.0744, 0.0721, 0.0731, 0.0560,
         0.2349],
        [0.0801, 0.0920, 0.0892, 0.1037, 0.1187, 0.0758, 0.0741, 0.0742, 0.0527,
         0.2394],
        [0.0732, 0.0871, 0.0830, 0.0969, 0.1093, 0.0734, 0.0715, 0.0706, 0.0431,
         0.2918],
        [0.0474, 0.0620, 0.0578, 0.0691, 0.0802, 0.0544, 0.0524, 0.0517, 0.0253,
         0.4998],
        [0

Epoch: 248.00, Train Loss: 0.18, Val Loss: 13.24, Train BLEU: 100.00, Val BLEU: 0.82
Sampling from training predictions...
Source: 大多 大多数 多数 地震 和 火山 喷发 也 都 发生
Reference: most of the earthquakes and volcanoes are in the
Model: <SOS> most of the earthquakes and volcanoes are in the
Attention Weights: tensor([[0.0965, 0.1482, 0.1027, 0.0991, 0.0907, 0.1095, 0.0905, 0.0832, 0.0874,
         0.0921],
        [0.0967, 0.1406, 0.1056, 0.1008, 0.0929, 0.1123, 0.0889, 0.0822, 0.0862,
         0.0936],
        [0.0935, 0.1415, 0.1036, 0.1001, 0.0930, 0.1162, 0.0881, 0.0818, 0.0869,
         0.0953],
        [0.0855, 0.1383, 0.1007, 0.1002, 0.0948, 0.1169, 0.0912, 0.0858, 0.0911,
         0.0956],
        [0.0781, 0.1232, 0.1029, 0.1049, 0.0977, 0.1178, 0.0971, 0.0905, 0.0942,
         0.0937],
        [0.0702, 0.1202, 0.1031, 0.1082, 0.0991, 0.1167, 0.1007, 0.0930, 0.0957,
         0.0932],
        [0.0656, 0.1176, 0.1045, 0.1112, 0.1007, 0.1149, 0.1050, 0.0949, 0.0961,
         0.0894],
       

Epoch: 252.00, Train Loss: 0.17, Val Loss: 13.29, Train BLEU: 100.00, Val BLEU: 0.83
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> but see all those different working things ? <EOS>
Attention Weights: tensor([[0.0919, 0.0979, 0.0971, 0.0971, 0.0961, 0.0943, 0.0871, 0.0825, 0.0556,
         0.2004],
        [0.0819, 0.0843, 0.0835, 0.0830, 0.0824, 0.0808, 0.0769, 0.0748, 0.0540,
         0.2985],
        [0.0839, 0.0886, 0.0895, 0.0897, 0.0899, 0.0885, 0.0852, 0.0820, 0.0617,
         0.2409],
        [0.0710, 0.0824, 0.0856, 0.0855, 0.0852, 0.0830, 0.0812, 0.0776, 0.0556,
         0.2928],
        [0.0688, 0.0802, 0.0824, 0.0822, 0.0817, 0.0799, 0.0779, 0.0741, 0.0501,
         0.3227],
        [0.0515, 0.0638, 0.0657, 0.0657, 0.0652, 0.0635, 0.0614, 0.0569, 0.0329,
         0.4733],
        [0.0212, 0.0282, 0.0297, 0.0296, 0.0295, 0.0288, 0.0281, 0.0260, 0.0138,
         0.7651],


Epoch: 256.00, Train Loss: 0.16, Val Loss: 13.32, Train BLEU: 100.00, Val BLEU: 0.84
Sampling from training predictions...
Source: 我 真 喜欢 这些 东西 <EOS> <PAD> <PAD> <PAD> <PAD>
Reference: i love that kind of stuff . <EOS> <PAD>
Model: <SOS> i love that kind of stuff . <EOS> <EOS>
Attention Weights: tensor([[0.0569, 0.0612, 0.0591, 0.0569, 0.0539, 0.0373, 0.1687, 0.1687, 0.1687,
         0.1687],
        [0.0326, 0.0353, 0.0351, 0.0343, 0.0334, 0.0250, 0.2011, 0.2011, 0.2011,
         0.2011],
        [0.0458, 0.0504, 0.0498, 0.0484, 0.0460, 0.0382, 0.1803, 0.1803, 0.1803,
         0.1803],
        [0.0487, 0.0559, 0.0551, 0.0530, 0.0497, 0.0370, 0.1752, 0.1752, 0.1752,
         0.1752],
        [0.0421, 0.0504, 0.0492, 0.0474, 0.0446, 0.0294, 0.1842, 0.1842, 0.1842,
         0.1842],
        [0.0259, 0.0323, 0.0320, 0.0317, 0.0305, 0.0187, 0.2072, 0.2072, 0.2072,
         0.2072],
        [0.0101, 0.0132, 0.0136, 0.0137, 0.0135, 0.0077, 0.2321, 0.2321, 0.2321,
         0.2321],
        [0

Epoch: 260.00, Train Loss: 0.15, Val Loss: 13.35, Train BLEU: 100.00, Val BLEU: 0.84
Sampling from training predictions...
Source: 大卫 <UNK> 通过 潜水 潜水艇 拍下 的 影片 把 我们
Reference: with vibrant video clips captured by submarines , david
Model: <SOS> with vibrant video clips captured by submarines , david
Attention Weights: tensor([[0.1060, 0.1247, 0.1012, 0.1057, 0.1430, 0.0842, 0.0854, 0.0847, 0.0816,
         0.0836],
        [0.1069, 0.1196, 0.1068, 0.1112, 0.1383, 0.0849, 0.0840, 0.0838, 0.0802,
         0.0843],
        [0.1114, 0.1309, 0.1018, 0.1090, 0.1398, 0.0820, 0.0824, 0.0809, 0.0784,
         0.0833],
        [0.0999, 0.1272, 0.1027, 0.1096, 0.1398, 0.0868, 0.0859, 0.0831, 0.0817,
         0.0832],
        [0.0898, 0.1112, 0.1037, 0.1100, 0.1421, 0.0930, 0.0914, 0.0876, 0.0860,
         0.0852],
        [0.0836, 0.1082, 0.1012, 0.1082, 0.1466, 0.0957, 0.0945, 0.0893, 0.0880,
         0.0846],
        [0.0798, 0.1129, 0.0955, 0.1028, 0.1485, 0.0959, 0.0959, 0.0915, 0.0905,
       

Epoch: 264.00, Train Loss: 0.14, Val Loss: 13.37, Train BLEU: 100.00, Val BLEU: 0.84
Sampling from training predictions...
Source: 大多 大多数 多数 地震 和 火山 喷发 也 都 发生
Reference: most of the earthquakes and volcanoes are in the
Model: <SOS> most of the earthquakes and volcanoes are in the
Attention Weights: tensor([[0.0970, 0.1527, 0.1022, 0.0987, 0.0903, 0.1084, 0.0894, 0.0828, 0.0869,
         0.0916],
        [0.0963, 0.1451, 0.1055, 0.1004, 0.0926, 0.1112, 0.0879, 0.0819, 0.0859,
         0.0931],
        [0.0928, 0.1463, 0.1029, 0.0998, 0.0926, 0.1155, 0.0872, 0.0815, 0.0869,
         0.0947],
        [0.0843, 0.1423, 0.1003, 0.1000, 0.0944, 0.1164, 0.0902, 0.0852, 0.0911,
         0.0957],
        [0.0767, 0.1267, 0.1033, 0.1047, 0.0972, 0.1178, 0.0961, 0.0896, 0.0940,
         0.0938],
        [0.0691, 0.1258, 0.1035, 0.1076, 0.0982, 0.1164, 0.0992, 0.0916, 0.0949,
         0.0937],
        [0.0645, 0.1239, 0.1051, 0.1107, 0.0995, 0.1141, 0.1035, 0.0933, 0.0949,
         0.0904],
       

Epoch: 268.00, Train Loss: 0.13, Val Loss: 13.40, Train BLEU: 100.00, Val BLEU: 0.84
Sampling from training predictions...
Source: 我们 得用 非常 特殊 的 仪器 才能 能到 到达 那个
Reference: we have to have a very special technology to
Model: <SOS> we have to have a very special technology to
Attention Weights: tensor([[0.0813, 0.1226, 0.0923, 0.1014, 0.0969, 0.1052, 0.1020, 0.1307, 0.0847,
         0.0828],
        [0.0825, 0.1236, 0.0915, 0.1006, 0.0956, 0.1045, 0.1025, 0.1270, 0.0867,
         0.0855],
        [0.0852, 0.1225, 0.0913, 0.0980, 0.0953, 0.1031, 0.1031, 0.1296, 0.0876,
         0.0842],
        [0.0834, 0.1149, 0.0957, 0.1000, 0.0977, 0.1026, 0.1022, 0.1280, 0.0913,
         0.0843],
        [0.0817, 0.1107, 0.0977, 0.1017, 0.0996, 0.1032, 0.1035, 0.1257, 0.0919,
         0.0843],
        [0.0790, 0.1062, 0.1000, 0.1028, 0.1013, 0.1040, 0.1034, 0.1265, 0.0933,
         0.0836],
        [0.0726, 0.1033, 0.1037, 0.1053, 0.1041, 0.1053, 0.1034, 0.1292, 0.0918,
         0.0813],
        [0.065

Epoch: 272.00, Train Loss: 0.12, Val Loss: 13.42, Train BLEU: 100.00, Val BLEU: 0.84
Sampling from training predictions...
Source: 和 我们 合作 的 人们 帮 我们 找到 了 新
Reference: people that have partnered with us have given us
Model: <SOS> people that have partnered with us have given us
Attention Weights: tensor([[0.0822, 0.0975, 0.1067, 0.1030, 0.1077, 0.1006, 0.1062, 0.0976, 0.0907,
         0.1078],
        [0.0842, 0.0978, 0.1076, 0.1017, 0.1075, 0.0992, 0.1055, 0.0989, 0.0900,
         0.1077],
        [0.0832, 0.0967, 0.1075, 0.1030, 0.1077, 0.0990, 0.1059, 0.1006, 0.0903,
         0.1062],
        [0.0823, 0.0977, 0.1065, 0.1029, 0.1066, 0.1001, 0.1052, 0.1022, 0.0940,
         0.1025],
        [0.0803, 0.0979, 0.1041, 0.1035, 0.1063, 0.1022, 0.1041, 0.1034, 0.0973,
         0.1011],
        [0.0764, 0.0972, 0.1027, 0.1042, 0.1070, 0.1044, 0.1040, 0.1039, 0.0995,
         0.1007],
        [0.0736, 0.0955, 0.1022, 0.1045, 0.1074, 0.1046, 0.1052, 0.1061, 0.1009,
         0.1000],
        [0

Epoch: 276.00, Train Loss: 0.11, Val Loss: 13.44, Train BLEU: 100.00, Val BLEU: 0.95
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> and it &apos;s got these jet thrusters up in
Attention Weights: tensor([[0.0830, 0.0869, 0.0820, 0.0825, 0.0818, 0.0837, 0.0787, 0.0828, 0.1201,
         0.2184],
        [0.0807, 0.0836, 0.0775, 0.0778, 0.0767, 0.0811, 0.0745, 0.0793, 0.1200,
         0.2488],
        [0.0793, 0.0853, 0.0800, 0.0814, 0.0803, 0.0851, 0.0779, 0.0810, 0.1182,
         0.2314],
        [0.0771, 0.0819, 0.0775, 0.0783, 0.0779, 0.0820, 0.0749, 0.0766, 0.1198,
         0.2540],
        [0.0793, 0.0881, 0.0841, 0.0834, 0.0843, 0.0871, 0.0802, 0.0810, 0.1170,
         0.2155],
        [0.0821, 0.0937, 0.0884, 0.0875, 0.0888, 0.0914, 0.0859, 0.0880, 0.1115,
         0.1827],
        [0.0791, 0.0971, 0.0916, 0.0909, 0.0939, 0.0945, 0.0921, 0.0941, 0.1098,
         0.1570],
        [0.0756

Epoch: 280.00, Train Loss: 0.10, Val Loss: 13.47, Train BLEU: 100.00, Val BLEU: 0.95
Sampling from training predictions...
Source: 大家 想想 海洋 占 了 地球 球面 面积 的 75
Reference: when you think about it , the oceans are
Model: <SOS> when you think about it , the oceans are
Attention Weights: tensor([[0.0879, 0.1069, 0.1100, 0.1046, 0.1094, 0.1088, 0.1028, 0.1092, 0.0961,
         0.0642],
        [0.0892, 0.1058, 0.1086, 0.1029, 0.1077, 0.1068, 0.1023, 0.1073, 0.0973,
         0.0721],
        [0.0840, 0.1060, 0.1115, 0.1054, 0.1101, 0.1114, 0.1035, 0.1065, 0.0939,
         0.0678],
        [0.0801, 0.1062, 0.1116, 0.1100, 0.1112, 0.1134, 0.1061, 0.1057, 0.0899,
         0.0658],
        [0.0788, 0.1049, 0.1135, 0.1117, 0.1126, 0.1157, 0.1075, 0.1079, 0.0896,
         0.0578],
        [0.0762, 0.1049, 0.1135, 0.1118, 0.1138, 0.1158, 0.1079, 0.1093, 0.0912,
         0.0555],
        [0.0732, 0.1045, 0.1106, 0.1145, 0.1142, 0.1149, 0.1114, 0.1089, 0.0925,
         0.0553],
        [0.0716, 0.1023,

Epoch: 284.00, Train Loss: 0.10, Val Loss: 13.50, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> this turns out to be the longest creature in
Attention Weights: tensor([[0.0898, 0.0991, 0.0955, 0.1200, 0.1261, 0.0852, 0.0805, 0.0764, 0.0447,
         0.1826],
        [0.0849, 0.0890, 0.0861, 0.1090, 0.1171, 0.0785, 0.0743, 0.0732, 0.0492,
         0.2386],
        [0.0845, 0.0907, 0.0871, 0.1097, 0.1180, 0.0768, 0.0727, 0.0737, 0.0535,
         0.2332],
        [0.0803, 0.0908, 0.0879, 0.1054, 0.1251, 0.0741, 0.0715, 0.0732, 0.0572,
         0.2345],
        [0.0801, 0.0926, 0.0885, 0.1028, 0.1235, 0.0760, 0.0742, 0.0748, 0.0537,
         0.2339],
        [0.0740, 0.0875, 0.0830, 0.0965, 0.1165, 0.0736, 0.0710, 0.0699, 0.0440,
         0.2841],
        [0.0459, 0.0594, 0.0556, 0.0668, 0.0832, 0.0522, 0.0495, 0.0489, 0.0250,
         0.5135],
        [0

Epoch: 288.00, Train Loss: 0.09, Val Loss: 13.52, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 泰坦 泰坦尼克 泰坦尼克号 坦尼 尼克 号 是 拿 了 不少
Reference: the truth of the matter is that the titanic
Model: <SOS> the truth of the matter is that the titanic
Attention Weights: tensor([[0.0991, 0.1236, 0.1259, 0.1499, 0.0862, 0.0831, 0.0849, 0.0836, 0.0809,
         0.0829],
        [0.1045, 0.1276, 0.1297, 0.1513, 0.0824, 0.0809, 0.0846, 0.0818, 0.0776,
         0.0795],
        [0.1050, 0.1274, 0.1255, 0.1492, 0.0815, 0.0812, 0.0855, 0.0835, 0.0794,
         0.0818],
        [0.1007, 0.1186, 0.1202, 0.1375, 0.0837, 0.0853, 0.0898, 0.0891, 0.0858,
         0.0893],
        [0.0944, 0.1175, 0.1230, 0.1381, 0.0846, 0.0853, 0.0901, 0.0897, 0.0865,
         0.0909],
        [0.0884, 0.1170, 0.1266, 0.1420, 0.0846, 0.0856, 0.0903, 0.0896, 0.0862,
         0.0899],
        [0.0819, 0.1127, 0.1256, 0.1410, 0.0850, 0.0876, 0.0925, 0.0919, 0.0884,
         0.0933],
        [0.080

Epoch: 292.00, Train Loss: 0.09, Val Loss: 13.54, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 我们 用 的 是 深海 潜水 潜水艇 <UNK> 号 和
Reference: we use the submarine alvin and we use cameras
Model: <SOS> we use the submarine alvin and we use cameras
Attention Weights: tensor([[0.0876, 0.0941, 0.0961, 0.0958, 0.1105, 0.1046, 0.1213, 0.1191, 0.0792,
         0.0917],
        [0.0855, 0.0924, 0.0941, 0.0962, 0.1109, 0.1055, 0.1259, 0.1152, 0.0796,
         0.0948],
        [0.0814, 0.0891, 0.0917, 0.0951, 0.1130, 0.1076, 0.1339, 0.1211, 0.0755,
         0.0916],
        [0.0771, 0.0904, 0.0954, 0.0978, 0.1148, 0.1090, 0.1306, 0.1217, 0.0755,
         0.0876],
        [0.0791, 0.0929, 0.0971, 0.1003, 0.1150, 0.1107, 0.1217, 0.1106, 0.0814,
         0.0912],
        [0.0777, 0.0939, 0.0978, 0.1004, 0.1135, 0.1103, 0.1215, 0.1131, 0.0815,
         0.0903],
        [0.0772, 0.0923, 0.0955, 0.0986, 0.1097, 0.1104, 0.1247, 0.1197, 0.0816,
         0.0902],
        [0.0

Epoch: 296.00, Train Loss: 0.08, Val Loss: 13.56, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 海洋 的 平均 深度 是 两英里 英里 <EOS> <PAD> <PAD>
Reference: the average depth is about two miles . <EOS>
Model: <SOS> the average depth is about two miles . <EOS>
Attention Weights: tensor([[0.0793, 0.0871, 0.0945, 0.0897, 0.0840, 0.0839, 0.0704, 0.0404, 0.1853,
         0.1853],
        [0.0680, 0.0707, 0.0775, 0.0728, 0.0671, 0.0656, 0.0575, 0.0384, 0.2412,
         0.2412],
        [0.0736, 0.0784, 0.0835, 0.0809, 0.0757, 0.0794, 0.0656, 0.0487, 0.2071,
         0.2071],
        [0.0691, 0.0765, 0.0801, 0.0788, 0.0755, 0.0784, 0.0667, 0.0457, 0.2147,
         0.2147],
        [0.0545, 0.0632, 0.0662, 0.0657, 0.0635, 0.0653, 0.0546, 0.0334, 0.2668,
         0.2668],
        [0.0346, 0.0424, 0.0449, 0.0445, 0.0433, 0.0450, 0.0368, 0.0195, 0.3445,
         0.3445],
        [0.0163, 0.0209, 0.0221, 0.0218, 0.0212, 0.0239, 0.0192, 0.0097, 0.4225,
         0.4225],
     

Epoch: 300.00, Train Loss: 0.08, Val Loss: 13.58, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> most of the animals are in the oceans .
Attention Weights: tensor([[0.0972, 0.1115, 0.1081, 0.1027, 0.1073, 0.0905, 0.0960, 0.1014, 0.0860,
         0.0993],
        [0.1042, 0.1156, 0.1076, 0.1006, 0.1075, 0.0867, 0.0949, 0.1014, 0.0824,
         0.0992],
        [0.0978, 0.1135, 0.1081, 0.1015, 0.1088, 0.0890, 0.0952, 0.1026, 0.0841,
         0.0994],
        [0.0879, 0.1109, 0.1086, 0.1029, 0.1100, 0.0921, 0.0957, 0.1059, 0.0888,
         0.0970],
        [0.0764, 0.1021, 0.1100, 0.1050, 0.1100, 0.0990, 0.0986, 0.1074, 0.0953,
         0.0963],
        [0.0693, 0.0964, 0.1121, 0.1072, 0.1098, 0.1029, 0.0986, 0.1101, 0.0999,
         0.0938],
        [0.0688, 0.0918, 0.1098, 0.1075, 0.1075, 0.1064, 0.0991, 0.1090, 0.1042,
         0.0960],
        [0.0711, 0.0931, 0

Epoch: 304.00, Train Loss: 0.07, Val Loss: 13.60, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: <UNK> 塞尔 <UNK> <UNK> 斯特 说 过 真正 的 探索
Reference: marcel proust said , &quot; the true voyage of
Model: <SOS> marcel proust said , &quot; the true voyage of
Attention Weights: tensor([[0.1056, 0.1165, 0.1201, 0.1337, 0.1271, 0.0765, 0.0786, 0.0786, 0.0770,
         0.0864],
        [0.0991, 0.1086, 0.1100, 0.1195, 0.1268, 0.0835, 0.0882, 0.0855, 0.0840,
         0.0949],
        [0.0924, 0.1000, 0.1035, 0.1166, 0.1301, 0.0851, 0.0902, 0.0901, 0.0897,
         0.1024],
        [0.0676, 0.0741, 0.0793, 0.1005, 0.1434, 0.1027, 0.1089, 0.1041, 0.1051,
         0.1143],
        [0.0523, 0.0576, 0.0624, 0.0847, 0.1370, 0.1171, 0.1261, 0.1172, 0.1209,
         0.1246],
        [0.0639, 0.0711, 0.0762, 0.1006, 0.1346, 0.1074, 0.1156, 0.1060, 0.1105,
         0.1141],
        [0.0585, 0.0653, 0.0717, 0.0980, 0.1341, 0.1092, 0.1184, 0.1101, 0.1153,
         0.1194],
   

Epoch: 308.00, Train Loss: 0.07, Val Loss: 13.62, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 是 我 最 喜欢 的 因为 它 哪 都 能动
Reference: it &apos;s one of my favorites , because it
Model: <SOS> it &apos;s one of my favorites , because it
Attention Weights: tensor([[0.0924, 0.1045, 0.1039, 0.1066, 0.1078, 0.1080, 0.1056, 0.1013, 0.1014,
         0.0685],
        [0.0974, 0.1069, 0.1037, 0.1058, 0.1072, 0.1062, 0.1033, 0.0988, 0.0982,
         0.0726],
        [0.0954, 0.1057, 0.1037, 0.1043, 0.1063, 0.1042, 0.1016, 0.0982, 0.0960,
         0.0847],
        [0.0904, 0.1072, 0.1070, 0.1057, 0.1081, 0.1054, 0.1031, 0.1000, 0.0969,
         0.0763],
        [0.0883, 0.1063, 0.1070, 0.1066, 0.1084, 0.1072, 0.1044, 0.1012, 0.0990,
         0.0717],
        [0.0852, 0.1059, 0.1079, 0.1075, 0.1094, 0.1084, 0.1057, 0.1020, 0.1000,
         0.0681],
        [0.0813, 0.1077, 0.1087, 0.1074, 0.1103, 0.1080, 0.1089, 0.1032, 0.1003,
         0.0642],
        [0.0801, 0.109

Epoch: 312.00, Train Loss: 0.07, Val Loss: 13.64, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> most of the animals are in the oceans .
Attention Weights: tensor([[0.0970, 0.1117, 0.1080, 0.1027, 0.1074, 0.0901, 0.0962, 0.1014, 0.0860,
         0.0994],
        [0.1041, 0.1158, 0.1078, 0.1006, 0.1077, 0.0860, 0.0952, 0.1016, 0.0823,
         0.0989],
        [0.0979, 0.1138, 0.1082, 0.1015, 0.1089, 0.0884, 0.0955, 0.1028, 0.0840,
         0.0989],
        [0.0877, 0.1109, 0.1090, 0.1029, 0.1101, 0.0916, 0.0959, 0.1063, 0.0887,
         0.0969],
        [0.0761, 0.1016, 0.1104, 0.1050, 0.1100, 0.0987, 0.0988, 0.1078, 0.0953,
         0.0964],
        [0.0693, 0.0959, 0.1122, 0.1071, 0.1097, 0.1026, 0.0986, 0.1106, 0.0999,
         0.0941],
        [0.0690, 0.0913, 0.1098, 0.1074, 0.1074, 0.1062, 0.0990, 0.1094, 0.1039,
         0.0966],
        [0.0716, 0.0929, 0

Epoch: 316.00, Train Loss: 0.06, Val Loss: 13.66, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 我们 将 用 一些 影片 来讲 讲述 一些 深海 海里
Reference: and we &apos;re going to tell you some stories
Model: <SOS> and we &apos;re going to tell you some stories
Attention Weights: tensor([[0.0795, 0.0914, 0.0989, 0.1070, 0.1021, 0.1015, 0.1050, 0.1049, 0.1072,
         0.1027],
        [0.0787, 0.0894, 0.0956, 0.1051, 0.1012, 0.1022, 0.1054, 0.1073, 0.1094,
         0.1058],
        [0.0779, 0.0908, 0.0985, 0.1087, 0.1039, 0.1026, 0.1049, 0.1045, 0.1064,
         0.1019],
        [0.0753, 0.0909, 0.1000, 0.1091, 0.1039, 0.1025, 0.1051, 0.1053, 0.1074,
         0.1003],
        [0.0759, 0.0905, 0.0985, 0.1052, 0.1031, 0.1031, 0.1060, 0.1078, 0.1099,
         0.0999],
        [0.0776, 0.0918, 0.0988, 0.1038, 0.1020, 0.1011, 0.1054, 0.1083, 0.1102,
         0.1010],
        [0.0746, 0.0899, 0.0974, 0.1018, 0.1033, 0.1029, 0.1070, 0.1105, 0.1126,
         0.1001],
        [0.

Epoch: 320.00, Train Loss: 0.06, Val Loss: 13.68, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> this turns out to be the longest creature in
Attention Weights: tensor([[0.0894, 0.0993, 0.0956, 0.1200, 0.1263, 0.0848, 0.0796, 0.0755, 0.0448,
         0.1847],
        [0.0848, 0.0891, 0.0858, 0.1084, 0.1180, 0.0784, 0.0737, 0.0725, 0.0490,
         0.2402],
        [0.0842, 0.0908, 0.0869, 0.1096, 0.1184, 0.0761, 0.0716, 0.0730, 0.0532,
         0.2361],
        [0.0803, 0.0914, 0.0878, 0.1051, 0.1259, 0.0742, 0.0715, 0.0734, 0.0577,
         0.2327],
        [0.0804, 0.0932, 0.0882, 0.1025, 0.1250, 0.0763, 0.0742, 0.0754, 0.0547,
         0.2301],
        [0.0748, 0.0881, 0.0825, 0.0963, 0.1186, 0.0737, 0.0708, 0.0704, 0.0456,
         0.2793],
        [0.0453, 0.0585, 0.0540, 0.0659, 0.0843, 0.0515, 0.0487, 0.0488, 0.0255,
         0.5175],
        [0

Epoch: 324.00, Train Loss: 0.06, Val Loss: 13.70, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 大卫 <UNK> 这位 是 比尔 <UNK> 我 是 大卫 <UNK>
Reference: this is bill lange . i &apos;m dave gallo
Model: <SOS> this is bill lange . i &apos;m dave gallo
Attention Weights: tensor([[0.0884, 0.1233, 0.0904, 0.0820, 0.1276, 0.1338, 0.0803, 0.0762, 0.0795,
         0.1185],
        [0.0894, 0.1174, 0.0964, 0.0867, 0.1273, 0.1242, 0.0824, 0.0796, 0.0836,
         0.1131],
        [0.0949, 0.1073, 0.0985, 0.0946, 0.1265, 0.1200, 0.0836, 0.0847, 0.0898,
         0.1000],
        [0.0879, 0.1083, 0.0970, 0.0953, 0.1364, 0.1256, 0.0790, 0.0815, 0.0893,
         0.0998],
        [0.0858, 0.1047, 0.1023, 0.1025, 0.1385, 0.1242, 0.0807, 0.0844, 0.0922,
         0.0847],
        [0.0777, 0.1170, 0.0943, 0.0954, 0.1541, 0.1378, 0.0710, 0.0751, 0.0873,
         0.0903],
        [0.0786, 0.1163, 0.0938, 0.0936, 0.1603, 0.1366, 0.0710, 0.0749, 0.0872,
         0.0877],
        [0.07

Epoch: 328.00, Train Loss: 0.06, Val Loss: 13.72, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 我们 将 用 一些 影片 来讲 讲述 一些 深海 海里
Reference: and we &apos;re going to tell you some stories
Model: <SOS> and we &apos;re going to tell you some stories
Attention Weights: tensor([[0.0793, 0.0916, 0.0990, 0.1069, 0.1018, 0.1015, 0.1049, 0.1051, 0.1074,
         0.1025],
        [0.0784, 0.0895, 0.0958, 0.1050, 0.1010, 0.1023, 0.1052, 0.1075, 0.1096,
         0.1056],
        [0.0777, 0.0910, 0.0987, 0.1087, 0.1037, 0.1026, 0.1046, 0.1047, 0.1066,
         0.1017],
        [0.0749, 0.0910, 0.1004, 0.1092, 0.1037, 0.1025, 0.1050, 0.1056, 0.1076,
         0.1001],
        [0.0757, 0.0906, 0.0986, 0.1051, 0.1030, 0.1030, 0.1061, 0.1081, 0.1101,
         0.0996],
        [0.0774, 0.0918, 0.0989, 0.1037, 0.1018, 0.1010, 0.1056, 0.1087, 0.1103,
         0.1009],
        [0.0743, 0.0898, 0.0974, 0.1017, 0.1033, 0.1028, 0.1071, 0.1109, 0.1128,
         0.0999],
        [0.

Epoch: 332.00, Train Loss: 0.05, Val Loss: 13.74, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 还有 这些 摇晃 着 旋转 转着 的 触角 <EOS> <PAD>
Reference: it &apos;s got tentacles dangling , swirling around like
Model: <SOS> it &apos;s got tentacles dangling , swirling around like
Attention Weights: tensor([[0.0987, 0.1060, 0.1051, 0.1063, 0.1044, 0.1312, 0.0820, 0.0767, 0.0546,
         0.1351],
        [0.0953, 0.0967, 0.0948, 0.0956, 0.0933, 0.1192, 0.0757, 0.0726, 0.0519,
         0.2048],
        [0.0960, 0.0994, 0.0981, 0.1007, 0.0971, 0.1244, 0.0795, 0.0756, 0.0646,
         0.1646],
        [0.0820, 0.0920, 0.0918, 0.0939, 0.0914, 0.1123, 0.0705, 0.0671, 0.0538,
         0.2453],
        [0.0799, 0.0895, 0.0872, 0.0902, 0.0873, 0.1049, 0.0724, 0.0691, 0.0489,
         0.2706],
        [0.0713, 0.0832, 0.0776, 0.0827, 0.0775, 0.1022, 0.0670, 0.0613, 0.0405,
         0.3367],
        [0.0399, 0.0514, 0.0484, 0.0518, 0.0486, 0.0684, 0.0401, 0.0363, 0.0199,
   

Epoch: 336.00, Train Loss: 0.05, Val Loss: 13.75, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> and it &apos;s got these jet thrusters up in
Attention Weights: tensor([[0.0826, 0.0878, 0.0823, 0.0822, 0.0817, 0.0834, 0.0787, 0.0821, 0.1189,
         0.2203],
        [0.0797, 0.0844, 0.0774, 0.0771, 0.0761, 0.0802, 0.0741, 0.0786, 0.1200,
         0.2524],
        [0.0778, 0.0856, 0.0792, 0.0800, 0.0792, 0.0837, 0.0771, 0.0800, 0.1198,
         0.2376],
        [0.0758, 0.0813, 0.0754, 0.0760, 0.0758, 0.0796, 0.0735, 0.0753, 0.1206,
         0.2668],
        [0.0780, 0.0872, 0.0817, 0.0807, 0.0818, 0.0845, 0.0787, 0.0802, 0.1196,
         0.2277],
        [0.0806, 0.0925, 0.0856, 0.0845, 0.0856, 0.0886, 0.0839, 0.0866, 0.1148,
         0.1974],
        [0.0777, 0.0952, 0.0877, 0.0871, 0.0893, 0.0909, 0.0892, 0.0921, 0.1153,
         0.1756],
        [0.0741

Epoch: 340.00, Train Loss: 0.05, Val Loss: 13.76, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 这儿 基本 基本上 都 没有 被 开发 发过 但是 像
Reference: it &apos;s mostly unexplored , and yet there are
Model: <SOS> it &apos;s mostly unexplored , and yet there are
Attention Weights: tensor([[0.0928, 0.1086, 0.1117, 0.1005, 0.1085, 0.0983, 0.1088, 0.0874, 0.0955,
         0.0878],
        [0.0945, 0.1120, 0.1154, 0.0965, 0.1090, 0.0955, 0.1095, 0.0851, 0.0947,
         0.0876],
        [0.0928, 0.1108, 0.1155, 0.0983, 0.1084, 0.0966, 0.1085, 0.0879, 0.0940,
         0.0872],
        [0.0863, 0.1081, 0.1163, 0.1000, 0.1089, 0.0984, 0.1085, 0.0906, 0.0963,
         0.0866],
        [0.0818, 0.1056, 0.1151, 0.1003, 0.1085, 0.0994, 0.1084, 0.0936, 0.1001,
         0.0872],
        [0.0735, 0.1008, 0.1135, 0.1016, 0.1092, 0.1012, 0.1099, 0.0971, 0.1064,
         0.0868],
        [0.0738, 0.0990, 0.1095, 0.1022, 0.1072, 0.1019, 0.1082, 0.0993, 0.1090,
         0.0899],
       

Epoch: 344.00, Train Loss: 0.05, Val Loss: 13.78, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 其实 它们 都 是 由 单独 的 动物 结合 合在
Reference: these are all individual animals banding together to make
Model: <SOS> these are all individual animals banding together to make
Attention Weights: tensor([[0.0898, 0.0993, 0.1004, 0.1015, 0.1028, 0.1014, 0.1008, 0.1021, 0.1005,
         0.1014],
        [0.0899, 0.0973, 0.0970, 0.0975, 0.0988, 0.0969, 0.0975, 0.1031, 0.1021,
         0.1198],
        [0.0884, 0.0984, 0.0987, 0.1011, 0.1023, 0.1012, 0.1007, 0.1043, 0.1008,
         0.1042],
        [0.0858, 0.0985, 0.0989, 0.1014, 0.1022, 0.1014, 0.1006, 0.1003, 0.0987,
         0.1122],
        [0.0843, 0.1015, 0.1016, 0.1040, 0.1047, 0.1043, 0.1035, 0.1022, 0.1018,
         0.0922],
        [0.0830, 0.1049, 0.1044, 0.1069, 0.1075, 0.1077, 0.1065, 0.1026, 0.1028,
         0.0738],
        [0.0798, 0.1037, 0.1027, 0.1040, 0.1047, 0.1048, 0.1050, 0.1027, 0.1048,
         

Epoch: 348.00, Train Loss: 0.05, Val Loss: 13.80, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 深海 海中 的 生命 大卫 <UNK> <EOS> <PAD> <PAD> <PAD>
Reference: life in the deep oceans <EOS> <PAD> <PAD> <PAD>
Model: <SOS> life in the deep oceans <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0722, 0.0812, 0.0743, 0.0764, 0.0713, 0.0673, 0.0306, 0.1756, 0.1756,
         0.1756],
        [0.0592, 0.0622, 0.0557, 0.0589, 0.0561, 0.0704, 0.0339, 0.2012, 0.2012,
         0.2012],
        [0.0691, 0.0743, 0.0669, 0.0727, 0.0723, 0.0922, 0.0447, 0.1692, 0.1692,
         0.1692],
        [0.0517, 0.0569, 0.0537, 0.0565, 0.0538, 0.0640, 0.0323, 0.2103, 0.2103,
         0.2103],
        [0.0258, 0.0298, 0.0290, 0.0295, 0.0266, 0.0290, 0.0139, 0.2721, 0.2721,
         0.2721],
        [0.0059, 0.0072, 0.0074, 0.0077, 0.0069, 0.0068, 0.0030, 0.3184, 0.3184,
         0.3184],
        [0.0028, 0.0031, 0.0033, 0.0035, 0.0032, 0.0038, 0.0015, 0.3263, 0.3263,
         0.

Epoch: 352.00, Train Loss: 0.04, Val Loss: 13.82, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 我们 将 用 一些 影片 来讲 讲述 一些 深海 海里
Reference: and we &apos;re going to tell you some stories
Model: <SOS> and we &apos;re going to tell you some stories
Attention Weights: tensor([[0.0793, 0.0918, 0.0993, 0.1066, 0.1017, 0.1014, 0.1051, 0.1055, 0.1075,
         0.1021],
        [0.0779, 0.0895, 0.0958, 0.1045, 0.1011, 0.1023, 0.1059, 0.1081, 0.1098,
         0.1051],
        [0.0775, 0.0911, 0.0988, 0.1083, 0.1037, 0.1024, 0.1053, 0.1052, 0.1067,
         0.1011],
        [0.0746, 0.0908, 0.1005, 0.1090, 0.1037, 0.1022, 0.1058, 0.1062, 0.1077,
         0.0995],
        [0.0753, 0.0902, 0.0985, 0.1050, 0.1032, 0.1028, 0.1072, 0.1087, 0.1101,
         0.0990],
        [0.0770, 0.0915, 0.0988, 0.1035, 0.1018, 0.1006, 0.1066, 0.1093, 0.1104,
         0.1004],
        [0.0736, 0.0893, 0.0973, 0.1017, 0.1035, 0.1025, 0.1082, 0.1115, 0.1129,
         0.0994],
        [0.

Epoch: 356.00, Train Loss: 0.04, Val Loss: 13.83, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> most of the animals are in the oceans .
Attention Weights: tensor([[0.0981, 0.1115, 0.1078, 0.1028, 0.1073, 0.0890, 0.0960, 0.1020, 0.0862,
         0.0992],
        [0.1047, 0.1155, 0.1079, 0.1006, 0.1079, 0.0849, 0.0950, 0.1028, 0.0823,
         0.0985],
        [0.0985, 0.1135, 0.1082, 0.1016, 0.1091, 0.0875, 0.0952, 0.1043, 0.0839,
         0.0981],
        [0.0882, 0.1110, 0.1095, 0.1029, 0.1103, 0.0901, 0.0952, 0.1078, 0.0881,
         0.0968],
        [0.0757, 0.1009, 0.1110, 0.1053, 0.1101, 0.0977, 0.0983, 0.1092, 0.0949,
         0.0968],
        [0.0700, 0.0948, 0.1122, 0.1073, 0.1096, 0.1014, 0.0977, 0.1117, 0.0993,
         0.0960],
        [0.0697, 0.0909, 0.1103, 0.1079, 0.1073, 0.1048, 0.0980, 0.1100, 0.1027,
         0.0986],
        [0.0731, 0.0926, 0

Epoch: 360.00, Train Loss: 0.04, Val Loss: 13.85, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 大部 大部分 部分 的 动物 也 都 生活 在 海洋
Reference: most of the animals are in the oceans .
Model: <SOS> most of the animals are in the oceans .
Attention Weights: tensor([[0.0982, 0.1116, 0.1078, 0.1028, 0.1073, 0.0889, 0.0960, 0.1021, 0.0862,
         0.0991],
        [0.1048, 0.1155, 0.1079, 0.1006, 0.1078, 0.0848, 0.0949, 0.1029, 0.0823,
         0.0985],
        [0.0985, 0.1135, 0.1082, 0.1016, 0.1092, 0.0874, 0.0951, 0.1044, 0.0839,
         0.0981],
        [0.0881, 0.1110, 0.1096, 0.1030, 0.1104, 0.0900, 0.0951, 0.1079, 0.0880,
         0.0969],
        [0.0757, 0.1009, 0.1110, 0.1054, 0.1102, 0.0976, 0.0982, 0.1093, 0.0948,
         0.0968],
        [0.0699, 0.0947, 0.1123, 0.1073, 0.1098, 0.1013, 0.0976, 0.1118, 0.0992,
         0.0961],
        [0.0696, 0.0908, 0.1103, 0.1079, 0.1075, 0.1047, 0.0979, 0.1101, 0.1025,
         0.0988],
        [0.0731, 0.0926, 0

Epoch: 364.00, Train Loss: 0.04, Val Loss: 13.87, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 我们 用 的 是 深海 潜水 潜水艇 <UNK> 号 和
Reference: we use the submarine alvin and we use cameras
Model: <SOS> we use the submarine alvin and we use cameras
Attention Weights: tensor([[0.0890, 0.0948, 0.0950, 0.0951, 0.1097, 0.1028, 0.1227, 0.1193, 0.0798,
         0.0917],
        [0.0856, 0.0931, 0.0945, 0.0967, 0.1118, 0.1055, 0.1272, 0.1143, 0.0788,
         0.0925],
        [0.0822, 0.0903, 0.0922, 0.0956, 0.1135, 0.1071, 0.1346, 0.1196, 0.0748,
         0.0901],
        [0.0764, 0.0904, 0.0956, 0.0983, 0.1163, 0.1096, 0.1345, 0.1209, 0.0730,
         0.0849],
        [0.0792, 0.0927, 0.0964, 0.0993, 0.1155, 0.1111, 0.1264, 0.1105, 0.0799,
         0.0890],
        [0.0778, 0.0931, 0.0968, 0.0999, 0.1143, 0.1110, 0.1259, 0.1139, 0.0795,
         0.0878],
        [0.0772, 0.0906, 0.0935, 0.0969, 0.1097, 0.1104, 0.1307, 0.1225, 0.0800,
         0.0885],
        [0.0

Epoch: 368.00, Train Loss: 0.04, Val Loss: 13.88, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 是 我 最 喜欢 的 因为 它 哪 都 能动
Reference: it &apos;s one of my favorites , because it
Model: <SOS> it &apos;s one of my favorites , because it
Attention Weights: tensor([[0.0923, 0.1048, 0.1036, 0.1064, 0.1078, 0.1082, 0.1060, 0.1018, 0.1011,
         0.0682],
        [0.0973, 0.1077, 0.1035, 0.1057, 0.1068, 0.1062, 0.1033, 0.0990, 0.0980,
         0.0725],
        [0.0953, 0.1063, 0.1033, 0.1042, 0.1059, 0.1041, 0.1015, 0.0980, 0.0958,
         0.0855],
        [0.0899, 0.1079, 0.1065, 0.1057, 0.1077, 0.1051, 0.1030, 0.0996, 0.0964,
         0.0780],
        [0.0878, 0.1070, 0.1067, 0.1068, 0.1083, 0.1071, 0.1044, 0.1011, 0.0986,
         0.0723],
        [0.0844, 0.1061, 0.1075, 0.1074, 0.1092, 0.1084, 0.1057, 0.1020, 0.0996,
         0.0696],
        [0.0798, 0.1072, 0.1077, 0.1070, 0.1096, 0.1084, 0.1091, 0.1033, 0.1001,
         0.0678],
        [0.0792, 0.108

Epoch: 372.00, Train Loss: 0.04, Val Loss: 13.90, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 其实 它们 都 是 由 单独 的 动物 结合 合在
Reference: these are all individual animals banding together to make
Model: <SOS> these are all individual animals banding together to make
Attention Weights: tensor([[0.0904, 0.0995, 0.1010, 0.1010, 0.1022, 0.1007, 0.1003, 0.1013, 0.1002,
         0.1034],
        [0.0903, 0.0974, 0.0977, 0.0970, 0.0982, 0.0963, 0.0970, 0.1024, 0.1016,
         0.1220],
        [0.0887, 0.0984, 0.0994, 0.1004, 0.1016, 0.1005, 0.0999, 0.1037, 0.0999,
         0.1075],
        [0.0857, 0.0982, 0.0990, 0.1008, 0.1016, 0.1008, 0.0998, 0.1001, 0.0979,
         0.1161],
        [0.0839, 0.1011, 0.1016, 0.1036, 0.1042, 0.1038, 0.1031, 0.1021, 0.1012,
         0.0953],
        [0.0827, 0.1046, 0.1046, 0.1066, 0.1070, 0.1073, 0.1061, 0.1025, 0.1022,
         0.0765],
        [0.0795, 0.1032, 0.1027, 0.1032, 0.1036, 0.1040, 0.1043, 0.1029, 0.1041,
         

Epoch: 376.00, Train Loss: 0.04, Val Loss: 13.92, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 海洋 的 平均 深度 是 两英里 英里 <EOS> <PAD> <PAD>
Reference: the average depth is about two miles . <EOS>
Model: <SOS> the average depth is about two miles . <EOS>
Attention Weights: tensor([[0.0779, 0.0863, 0.0943, 0.0893, 0.0850, 0.0852, 0.0701, 0.0390, 0.1865,
         0.1865],
        [0.0675, 0.0709, 0.0787, 0.0732, 0.0681, 0.0676, 0.0576, 0.0381, 0.2392,
         0.2392],
        [0.0744, 0.0800, 0.0862, 0.0830, 0.0781, 0.0823, 0.0658, 0.0485, 0.2009,
         0.2009],
        [0.0696, 0.0768, 0.0811, 0.0798, 0.0763, 0.0803, 0.0660, 0.0454, 0.2124,
         0.2124],
        [0.0541, 0.0620, 0.0655, 0.0655, 0.0628, 0.0668, 0.0530, 0.0331, 0.2686,
         0.2686],
        [0.0343, 0.0417, 0.0443, 0.0446, 0.0429, 0.0457, 0.0356, 0.0190, 0.3460,
         0.3460],
        [0.0167, 0.0210, 0.0220, 0.0223, 0.0213, 0.0253, 0.0191, 0.0100, 0.4212,
         0.4212],
     

Epoch: 380.00, Train Loss: 0.03, Val Loss: 13.93, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 这儿 基本 基本上 都 没有 被 开发 发过 但是 像
Reference: it &apos;s mostly unexplored , and yet there are
Model: <SOS> it &apos;s mostly unexplored , and yet there are
Attention Weights: tensor([[0.0919, 0.1075, 0.1121, 0.0999, 0.1090, 0.0975, 0.1092, 0.0876, 0.0965,
         0.0887],
        [0.0923, 0.1100, 0.1159, 0.0959, 0.1103, 0.0946, 0.1106, 0.0846, 0.0973,
         0.0887],
        [0.0916, 0.1096, 0.1155, 0.0974, 0.1091, 0.0955, 0.1094, 0.0873, 0.0964,
         0.0882],
        [0.0846, 0.1072, 0.1173, 0.0994, 0.1096, 0.0972, 0.1094, 0.0898, 0.0982,
         0.0873],
        [0.0799, 0.1046, 0.1167, 0.0997, 0.1089, 0.0982, 0.1091, 0.0930, 0.1020,
         0.0880],
        [0.0710, 0.0995, 0.1154, 0.1007, 0.1100, 0.0999, 0.1110, 0.0966, 0.1087,
         0.0872],
        [0.0724, 0.0980, 0.1112, 0.1005, 0.1081, 0.1000, 0.1091, 0.0983, 0.1110,
         0.0912],
       

Epoch: 384.00, Train Loss: 0.03, Val Loss: 13.95, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 海洋 里 生物 的 多样 多样性 和 密度 要 比
Reference: the biodiversity and the <UNK> in the ocean is
Model: <SOS> the biodiversity and the <UNK> in the ocean is
Attention Weights: tensor([[0.1016, 0.0990, 0.1129, 0.1059, 0.1159, 0.1421, 0.0821, 0.0822, 0.0773,
         0.0811],
        [0.1025, 0.0985, 0.1122, 0.1026, 0.1145, 0.1431, 0.0830, 0.0817, 0.0779,
         0.0839],
        [0.1013, 0.0982, 0.1116, 0.1028, 0.1165, 0.1435, 0.0822, 0.0812, 0.0772,
         0.0854],
        [0.0952, 0.0974, 0.1136, 0.1045, 0.1193, 0.1437, 0.0833, 0.0822, 0.0779,
         0.0830],
        [0.0883, 0.0954, 0.1144, 0.1071, 0.1191, 0.1447, 0.0853, 0.0836, 0.0800,
         0.0822],
        [0.0838, 0.0933, 0.1123, 0.1089, 0.1168, 0.1451, 0.0886, 0.0858, 0.0836,
         0.0817],
        [0.0803, 0.0897, 0.1109, 0.1103, 0.1164, 0.1461, 0.0919, 0.0879, 0.0864,
         0.0802],
        [0.08

Epoch: 388.00, Train Loss: 0.03, Val Loss: 13.96, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 海洋 里 生物 的 多样 多样性 和 密度 要 比
Reference: the biodiversity and the <UNK> in the ocean is
Model: <SOS> the biodiversity and the <UNK> in the ocean is
Attention Weights: tensor([[0.1017, 0.0990, 0.1129, 0.1059, 0.1158, 0.1422, 0.0821, 0.0822, 0.0773,
         0.0811],
        [0.1024, 0.0984, 0.1123, 0.1025, 0.1145, 0.1434, 0.0830, 0.0817, 0.0779,
         0.0839],
        [0.1012, 0.0982, 0.1116, 0.1027, 0.1165, 0.1439, 0.0821, 0.0812, 0.0772,
         0.0854],
        [0.0951, 0.0974, 0.1136, 0.1044, 0.1194, 0.1440, 0.0832, 0.0821, 0.0778,
         0.0830],
        [0.0881, 0.0953, 0.1144, 0.1070, 0.1192, 0.1451, 0.0852, 0.0835, 0.0800,
         0.0822],
        [0.0836, 0.0932, 0.1124, 0.1088, 0.1169, 0.1455, 0.0886, 0.0858, 0.0836,
         0.0818],
        [0.0801, 0.0895, 0.1110, 0.1101, 0.1165, 0.1466, 0.0919, 0.0879, 0.0864,
         0.0802],
        [0.08

Epoch: 392.00, Train Loss: 0.03, Val Loss: 13.97, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> but see all those different working things ? <EOS>
Attention Weights: tensor([[0.0910, 0.0994, 0.0994, 0.0987, 0.0976, 0.0944, 0.0868, 0.0794, 0.0541,
         0.1991],
        [0.0796, 0.0850, 0.0846, 0.0835, 0.0826, 0.0797, 0.0751, 0.0709, 0.0516,
         0.3073],
        [0.0832, 0.0897, 0.0906, 0.0906, 0.0902, 0.0880, 0.0838, 0.0788, 0.0603,
         0.2448],
        [0.0707, 0.0844, 0.0872, 0.0870, 0.0859, 0.0833, 0.0810, 0.0764, 0.0566,
         0.2875],
        [0.0695, 0.0829, 0.0845, 0.0841, 0.0828, 0.0805, 0.0779, 0.0728, 0.0517,
         0.3132],
        [0.0528, 0.0669, 0.0671, 0.0667, 0.0656, 0.0639, 0.0611, 0.0557, 0.0344,
         0.4657],
        [0.0203, 0.0275, 0.0285, 0.0285, 0.0281, 0.0279, 0.0269, 0.0247, 0.0140,
         0.7738],


Epoch: 396.00, Train Loss: 0.03, Val Loss: 13.99, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 还有 这些 摇晃 着 旋转 转着 的 触角 <EOS> <PAD>
Reference: it &apos;s got tentacles dangling , swirling around like
Model: <SOS> it &apos;s got tentacles dangling , swirling around like
Attention Weights: tensor([[0.0993, 0.1062, 0.1058, 0.1062, 0.1050, 0.1332, 0.0825, 0.0768, 0.0541,
         0.1308],
        [0.0953, 0.0966, 0.0951, 0.0948, 0.0938, 0.1207, 0.0763, 0.0727, 0.0511,
         0.2037],
        [0.0954, 0.0990, 0.0982, 0.0995, 0.0973, 0.1256, 0.0797, 0.0755, 0.0650,
         0.1647],
        [0.0808, 0.0917, 0.0918, 0.0931, 0.0916, 0.1137, 0.0707, 0.0668, 0.0537,
         0.2462],
        [0.0797, 0.0900, 0.0877, 0.0904, 0.0879, 0.1075, 0.0731, 0.0690, 0.0491,
         0.2656],
        [0.0716, 0.0839, 0.0785, 0.0827, 0.0783, 0.1054, 0.0680, 0.0616, 0.0413,
         0.3286],
        [0.0395, 0.0512, 0.0485, 0.0514, 0.0486, 0.0698, 0.0402, 0.0359, 0.0199,
   

Epoch: 400.00, Train Loss: 0.03, Val Loss: 14.00, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 这 是 一只 水母 <EOS> <PAD> <PAD> <PAD> <PAD> <PAD>
Reference: here &apos;s a jelly . <EOS> <PAD> <PAD> <PAD>
Model: <SOS> here &apos;s a jelly . <EOS> <EOS> <EOS> <EOS>
Attention Weights: tensor([[0.0396, 0.0472, 0.0444, 0.0415, 0.0234, 0.1608, 0.1608, 0.1608, 0.1608,
         0.1608],
        [0.0272, 0.0311, 0.0304, 0.0295, 0.0198, 0.1724, 0.1724, 0.1724, 0.1724,
         0.1724],
        [0.0396, 0.0456, 0.0442, 0.0429, 0.0343, 0.1587, 0.1587, 0.1587, 0.1587,
         0.1587],
        [0.0383, 0.0442, 0.0419, 0.0421, 0.0289, 0.1609, 0.1609, 0.1609, 0.1609,
         0.1609],
        [0.0152, 0.0180, 0.0173, 0.0183, 0.0101, 0.1842, 0.1842, 0.1842, 0.1842,
         0.1842],
        [0.0013, 0.0016, 0.0017, 0.0019, 0.0008, 0.1985, 0.1985, 0.1985, 0.1985,
         0.1985],
        [0.0011, 0.0013, 0.0014, 0.0015, 0.0007, 0.1988, 0.1988, 0.1988, 0.1988,
         0.

Epoch: 404.00, Train Loss: 0.03, Val Loss: 14.01, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 还有 这些 摇晃 着 旋转 转着 的 触角 <EOS> <PAD>
Reference: it &apos;s got tentacles dangling , swirling around like
Model: <SOS> it &apos;s got tentacles dangling , swirling around like
Attention Weights: tensor([[0.0993, 0.1062, 0.1059, 0.1063, 0.1050, 0.1334, 0.0823, 0.0767, 0.0540,
         0.1308],
        [0.0952, 0.0966, 0.0951, 0.0948, 0.0937, 0.1208, 0.0761, 0.0726, 0.0510,
         0.2040],
        [0.0953, 0.0991, 0.0982, 0.0995, 0.0972, 0.1256, 0.0796, 0.0755, 0.0651,
         0.1649],
        [0.0807, 0.0917, 0.0918, 0.0930, 0.0914, 0.1137, 0.0707, 0.0668, 0.0537,
         0.2465],
        [0.0797, 0.0901, 0.0877, 0.0904, 0.0879, 0.1078, 0.0731, 0.0691, 0.0492,
         0.2649],
        [0.0717, 0.0841, 0.0786, 0.0827, 0.0783, 0.1057, 0.0680, 0.0617, 0.0414,
         0.3276],
        [0.0395, 0.0512, 0.0486, 0.0513, 0.0485, 0.0699, 0.0401, 0.0359, 0.0199,
   

Epoch: 408.00, Train Loss: 0.03, Val Loss: 14.03, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 海洋 的 平均 深度 是 两英里 英里 <EOS> <PAD> <PAD>
Reference: the average depth is about two miles . <EOS>
Model: <SOS> the average depth is about two miles . <EOS>
Attention Weights: tensor([[0.0779, 0.0864, 0.0949, 0.0898, 0.0857, 0.0857, 0.0696, 0.0386, 0.1857,
         0.1857],
        [0.0671, 0.0705, 0.0789, 0.0733, 0.0680, 0.0678, 0.0570, 0.0376, 0.2399,
         0.2399],
        [0.0746, 0.0802, 0.0871, 0.0836, 0.0786, 0.0830, 0.0653, 0.0479, 0.1998,
         0.1998],
        [0.0698, 0.0769, 0.0817, 0.0803, 0.0765, 0.0810, 0.0654, 0.0449, 0.2118,
         0.2118],
        [0.0540, 0.0619, 0.0657, 0.0658, 0.0627, 0.0674, 0.0523, 0.0327, 0.2688,
         0.2688],
        [0.0344, 0.0417, 0.0445, 0.0450, 0.0429, 0.0461, 0.0351, 0.0188, 0.3457,
         0.3457],
        [0.0167, 0.0209, 0.0220, 0.0223, 0.0212, 0.0255, 0.0188, 0.0099, 0.4213,
         0.4213],
     

Epoch: 412.00, Train Loss: 0.03, Val Loss: 14.04, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 其实 它们 都 是 由 单独 的 动物 结合 合在
Reference: these are all individual animals banding together to make
Model: <SOS> these are all individual animals banding together to make
Attention Weights: tensor([[0.0905, 0.0992, 0.1008, 0.1007, 0.1020, 0.1005, 0.1000, 0.1009, 0.1000,
         0.1055],
        [0.0905, 0.0973, 0.0975, 0.0966, 0.0979, 0.0960, 0.0970, 0.1022, 0.1015,
         0.1234],
        [0.0888, 0.0980, 0.0992, 0.1000, 0.1013, 0.1000, 0.0998, 0.1032, 0.0995,
         0.1102],
        [0.0857, 0.0978, 0.0989, 0.1007, 0.1015, 0.1003, 0.0993, 0.0994, 0.0973,
         0.1192],
        [0.0837, 0.1009, 0.1018, 0.1037, 0.1043, 0.1032, 0.1028, 0.1016, 0.1005,
         0.0974],
        [0.0825, 0.1043, 0.1048, 0.1067, 0.1070, 0.1068, 0.1057, 0.1021, 0.1016,
         0.0785],
        [0.0795, 0.1026, 0.1029, 0.1030, 0.1032, 0.1033, 0.1037, 0.1025, 0.1035,
         

Epoch: 416.00, Train Loss: 0.03, Val Loss: 14.05, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 和 我们 合作 的 人们 帮 我们 找到 了 新
Reference: people that have partnered with us have given us
Model: <SOS> people that have partnered with us have given us
Attention Weights: tensor([[0.0808, 0.0977, 0.1090, 0.1026, 0.1079, 0.0993, 0.1078, 0.0970, 0.0901,
         0.1078],
        [0.0805, 0.0976, 0.1092, 0.1017, 0.1073, 0.0981, 0.1068, 0.0992, 0.0903,
         0.1093],
        [0.0795, 0.0965, 0.1093, 0.1029, 0.1075, 0.0982, 0.1076, 0.1011, 0.0906,
         0.1067],
        [0.0799, 0.0981, 0.1074, 0.1029, 0.1060, 0.0995, 0.1060, 0.1024, 0.0945,
         0.1032],
        [0.0776, 0.0981, 0.1049, 0.1041, 0.1062, 0.1015, 0.1053, 0.1040, 0.0977,
         0.1006],
        [0.0730, 0.0968, 0.1035, 0.1045, 0.1073, 0.1037, 0.1059, 0.1055, 0.1003,
         0.0995],
        [0.0693, 0.0937, 0.1025, 0.1041, 0.1082, 0.1036, 0.1079, 0.1092, 0.1023,
         0.0993],
        [0

Epoch: 420.00, Train Loss: 0.03, Val Loss: 14.07, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> and it &apos;s got these jet thrusters up in
Attention Weights: tensor([[0.0830, 0.0883, 0.0823, 0.0818, 0.0813, 0.0838, 0.0786, 0.0799, 0.1185,
         0.2226],
        [0.0787, 0.0843, 0.0768, 0.0757, 0.0753, 0.0798, 0.0734, 0.0763, 0.1223,
         0.2575],
        [0.0766, 0.0854, 0.0783, 0.0784, 0.0780, 0.0829, 0.0765, 0.0780, 0.1223,
         0.2436],
        [0.0744, 0.0800, 0.0732, 0.0734, 0.0734, 0.0777, 0.0720, 0.0729, 0.1219,
         0.2810],
        [0.0766, 0.0860, 0.0793, 0.0781, 0.0792, 0.0827, 0.0776, 0.0781, 0.1222,
         0.2402],
        [0.0790, 0.0909, 0.0831, 0.0817, 0.0826, 0.0865, 0.0824, 0.0841, 0.1180,
         0.2118],
        [0.0760, 0.0931, 0.0843, 0.0838, 0.0855, 0.0884, 0.0876, 0.0890, 0.1202,
         0.1921],
        [0.0726

Epoch: 424.00, Train Loss: 0.02, Val Loss: 14.07, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 海洋 的 平均 深度 是 两英里 英里 <EOS> <PAD> <PAD>
Reference: the average depth is about two miles . <EOS>
Model: <SOS> the average depth is about two miles . <EOS>
Attention Weights: tensor([[0.0779, 0.0866, 0.0951, 0.0899, 0.0858, 0.0859, 0.0694, 0.0384, 0.1855,
         0.1855],
        [0.0669, 0.0706, 0.0789, 0.0733, 0.0680, 0.0680, 0.0569, 0.0374, 0.2400,
         0.2400],
        [0.0746, 0.0805, 0.0874, 0.0839, 0.0788, 0.0833, 0.0652, 0.0477, 0.1993,
         0.1993],
        [0.0697, 0.0769, 0.0818, 0.0805, 0.0766, 0.0812, 0.0652, 0.0448, 0.2116,
         0.2116],
        [0.0539, 0.0618, 0.0658, 0.0659, 0.0627, 0.0676, 0.0521, 0.0326, 0.2689,
         0.2689],
        [0.0345, 0.0420, 0.0448, 0.0454, 0.0432, 0.0466, 0.0352, 0.0188, 0.3448,
         0.3448],
        [0.0168, 0.0211, 0.0221, 0.0225, 0.0213, 0.0258, 0.0189, 0.0099, 0.4208,
         0.4208],
     

Epoch: 428.00, Train Loss: 0.02, Val Loss: 14.09, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 原来 它 是 海洋 洋中 最长 的 生物 <EOS> <PAD>
Reference: this turns out to be the longest creature in
Model: <SOS> this turns out to be the longest creature in
Attention Weights: tensor([[0.0881, 0.0992, 0.0948, 0.1213, 0.1294, 0.0836, 0.0781, 0.0735, 0.0441,
         0.1880],
        [0.0838, 0.0892, 0.0853, 0.1083, 0.1225, 0.0781, 0.0728, 0.0708, 0.0475,
         0.2418],
        [0.0828, 0.0906, 0.0865, 0.1096, 0.1214, 0.0746, 0.0699, 0.0710, 0.0519,
         0.2417],
        [0.0792, 0.0924, 0.0880, 0.1060, 0.1325, 0.0743, 0.0711, 0.0728, 0.0573,
         0.2265],
        [0.0802, 0.0942, 0.0885, 0.1040, 0.1324, 0.0770, 0.0741, 0.0754, 0.0541,
         0.2202],
        [0.0762, 0.0900, 0.0840, 0.0994, 0.1291, 0.0751, 0.0712, 0.0711, 0.0470,
         0.2570],
        [0.0445, 0.0573, 0.0529, 0.0671, 0.0911, 0.0520, 0.0479, 0.0478, 0.0251,
         0.5144],
        [0

Epoch: 432.00, Train Loss: 0.02, Val Loss: 14.10, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 地球 的 大部 大部分 部分 都 是 海水 <EOS> <PAD>
Reference: most of the planet is ocean water . <EOS>
Model: <SOS> most of the planet is ocean water . <EOS>
Attention Weights: tensor([[0.1025, 0.1113, 0.1222, 0.1180, 0.1035, 0.0864, 0.0906, 0.0886, 0.0517,
         0.1252],
        [0.1062, 0.1096, 0.1205, 0.1133, 0.0941, 0.0761, 0.0818, 0.0848, 0.0506,
         0.1629],
        [0.0979, 0.1065, 0.1176, 0.1093, 0.0904, 0.0734, 0.0797, 0.0835, 0.0530,
         0.1886],
        [0.0782, 0.0915, 0.1021, 0.0966, 0.0792, 0.0676, 0.0736, 0.0762, 0.0479,
         0.2872],
        [0.0661, 0.0791, 0.0869, 0.0869, 0.0747, 0.0667, 0.0686, 0.0694, 0.0452,
         0.3567],
        [0.0435, 0.0532, 0.0589, 0.0644, 0.0585, 0.0519, 0.0514, 0.0523, 0.0291,
         0.5369],
        [0.0130, 0.0167, 0.0189, 0.0221, 0.0208, 0.0185, 0.0179, 0.0190, 0.0093,
         0.8437],
        [0.0057

Epoch: 436.00, Train Loss: 0.02, Val Loss: 14.11, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> but see all those different working things ? <EOS>
Attention Weights: tensor([[0.0908, 0.0995, 0.0998, 0.0990, 0.0980, 0.0947, 0.0871, 0.0790, 0.0531,
         0.1990],
        [0.0789, 0.0848, 0.0847, 0.0835, 0.0826, 0.0797, 0.0751, 0.0704, 0.0507,
         0.3096],
        [0.0831, 0.0898, 0.0909, 0.0908, 0.0905, 0.0882, 0.0839, 0.0786, 0.0598,
         0.2443],
        [0.0707, 0.0846, 0.0874, 0.0872, 0.0860, 0.0834, 0.0809, 0.0762, 0.0566,
         0.2871],
        [0.0699, 0.0837, 0.0851, 0.0847, 0.0833, 0.0809, 0.0783, 0.0729, 0.0518,
         0.3095],
        [0.0530, 0.0673, 0.0674, 0.0669, 0.0658, 0.0640, 0.0612, 0.0556, 0.0343,
         0.4645],
        [0.0201, 0.0272, 0.0282, 0.0281, 0.0277, 0.0276, 0.0266, 0.0243, 0.0138,
         0.7764],


Epoch: 440.00, Train Loss: 0.02, Val Loss: 14.12, Train BLEU: 100.00, Val BLEU: 0.28
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> but see all those different working things ? <EOS>
Attention Weights: tensor([[0.0908, 0.0995, 0.0998, 0.0990, 0.0980, 0.0947, 0.0871, 0.0791, 0.0531,
         0.1990],
        [0.0789, 0.0848, 0.0847, 0.0835, 0.0826, 0.0797, 0.0751, 0.0704, 0.0506,
         0.3098],
        [0.0831, 0.0898, 0.0909, 0.0909, 0.0905, 0.0882, 0.0840, 0.0786, 0.0597,
         0.2442],
        [0.0707, 0.0846, 0.0874, 0.0872, 0.0860, 0.0834, 0.0809, 0.0762, 0.0566,
         0.2870],
        [0.0699, 0.0837, 0.0852, 0.0847, 0.0833, 0.0809, 0.0783, 0.0729, 0.0518,
         0.3092],
        [0.0530, 0.0673, 0.0674, 0.0669, 0.0658, 0.0640, 0.0612, 0.0556, 0.0343,
         0.4645],
        [0.0200, 0.0272, 0.0281, 0.0281, 0.0277, 0.0275, 0.0266, 0.0243, 0.0137,
         0.7767],


Epoch: 444.00, Train Loss: 0.02, Val Loss: 14.13, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 海洋 里 生物 的 多样 多样性 和 密度 要 比
Reference: the biodiversity and the <UNK> in the ocean is
Model: <SOS> the biodiversity and the <UNK> in the ocean is
Attention Weights: tensor([[0.1015, 0.0991, 0.1140, 0.1062, 0.1156, 0.1437, 0.0812, 0.0814, 0.0765,
         0.0807],
        [0.1020, 0.0984, 0.1132, 0.1022, 0.1147, 0.1460, 0.0821, 0.0809, 0.0770,
         0.0836],
        [0.1011, 0.0982, 0.1123, 0.1023, 0.1167, 0.1470, 0.0811, 0.0801, 0.0762,
         0.0851],
        [0.0946, 0.0975, 0.1148, 0.1043, 0.1200, 0.1472, 0.0817, 0.0807, 0.0767,
         0.0826],
        [0.0874, 0.0951, 0.1151, 0.1067, 0.1198, 0.1491, 0.0836, 0.0822, 0.0791,
         0.0820],
        [0.0828, 0.0928, 0.1123, 0.1083, 0.1173, 0.1501, 0.0872, 0.0847, 0.0829,
         0.0816],
        [0.0795, 0.0888, 0.1104, 0.1090, 0.1166, 0.1524, 0.0905, 0.0868, 0.0858,
         0.0802],
        [0.08

Epoch: 448.00, Train Loss: 0.02, Val Loss: 14.14, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 看到 这些 在 动 的 东西 了 吗 <EOS> <PAD>
Reference: but see all those different working things ? <EOS>
Model: <SOS> but see all those different working things ? <EOS>
Attention Weights: tensor([[0.0908, 0.0995, 0.0998, 0.0990, 0.0979, 0.0946, 0.0871, 0.0791, 0.0530,
         0.1992],
        [0.0788, 0.0848, 0.0846, 0.0834, 0.0825, 0.0795, 0.0750, 0.0703, 0.0506,
         0.3106],
        [0.0831, 0.0899, 0.0910, 0.0910, 0.0905, 0.0881, 0.0840, 0.0787, 0.0597,
         0.2440],
        [0.0707, 0.0847, 0.0875, 0.0873, 0.0860, 0.0834, 0.0809, 0.0762, 0.0566,
         0.2867],
        [0.0700, 0.0839, 0.0853, 0.0849, 0.0834, 0.0810, 0.0784, 0.0730, 0.0518,
         0.3084],
        [0.0531, 0.0674, 0.0676, 0.0670, 0.0659, 0.0640, 0.0613, 0.0557, 0.0343,
         0.4638],
        [0.0200, 0.0272, 0.0281, 0.0281, 0.0277, 0.0275, 0.0266, 0.0243, 0.0137,
         0.7767],


Epoch: 452.00, Train Loss: 0.02, Val Loss: 14.16, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 还有 前面 的 这个 是 推进 引擎 它 一会 一会儿
Reference: and it &apos;s got these jet thrusters up in
Model: <SOS> and it &apos;s got these jet thrusters up in
Attention Weights: tensor([[0.0833, 0.0881, 0.0820, 0.0818, 0.0810, 0.0836, 0.0780, 0.0794, 0.1191,
         0.2238],
        [0.0789, 0.0843, 0.0766, 0.0758, 0.0751, 0.0797, 0.0731, 0.0760, 0.1232,
         0.2571],
        [0.0768, 0.0854, 0.0781, 0.0785, 0.0778, 0.0828, 0.0762, 0.0778, 0.1232,
         0.2433],
        [0.0745, 0.0798, 0.0727, 0.0732, 0.0730, 0.0774, 0.0717, 0.0726, 0.1225,
         0.2825],
        [0.0766, 0.0857, 0.0789, 0.0778, 0.0786, 0.0821, 0.0773, 0.0779, 0.1230,
         0.2420],
        [0.0789, 0.0903, 0.0822, 0.0811, 0.0818, 0.0857, 0.0818, 0.0837, 0.1193,
         0.2152],
        [0.0760, 0.0925, 0.0834, 0.0832, 0.0846, 0.0874, 0.0871, 0.0885, 0.1220,
         0.1952],
        [0.0726

Epoch: 456.00, Train Loss: 0.02, Val Loss: 14.16, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 和 我们 合作 的 人们 帮 我们 找到 了 新
Reference: people that have partnered with us have given us
Model: <SOS> people that have partnered with us have given us
Attention Weights: tensor([[0.0808, 0.0978, 0.1092, 0.1026, 0.1079, 0.0994, 0.1079, 0.0968, 0.0901,
         0.1075],
        [0.0799, 0.0975, 0.1096, 0.1018, 0.1073, 0.0981, 0.1072, 0.0989, 0.0903,
         0.1094],
        [0.0793, 0.0964, 0.1095, 0.1030, 0.1074, 0.0982, 0.1078, 0.1010, 0.0907,
         0.1066],
        [0.0797, 0.0981, 0.1075, 0.1029, 0.1059, 0.0995, 0.1061, 0.1023, 0.0948,
         0.1031],
        [0.0773, 0.0980, 0.1049, 0.1041, 0.1062, 0.1015, 0.1055, 0.1040, 0.0979,
         0.1006],
        [0.0726, 0.0966, 0.1034, 0.1045, 0.1073, 0.1037, 0.1061, 0.1057, 0.1006,
         0.0995],
        [0.0688, 0.0934, 0.1023, 0.1040, 0.1083, 0.1035, 0.1081, 0.1095, 0.1026,
         0.0994],
        [0

Epoch: 460.00, Train Loss: 0.02, Val Loss: 14.17, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 其实 实地 地球 上 最长 的 山脉 都 在 海洋
Reference: and in the oceans , there are the longest
Model: <SOS> and in the oceans , there are the longest
Attention Weights: tensor([[0.0814, 0.1024, 0.1146, 0.0996, 0.1039, 0.1016, 0.1077, 0.0960, 0.0941,
         0.0987],
        [0.0835, 0.1042, 0.1139, 0.0957, 0.1027, 0.0993, 0.1086, 0.0929, 0.0958,
         0.1034],
        [0.0801, 0.1028, 0.1140, 0.0967, 0.1030, 0.0997, 0.1088, 0.0940, 0.0970,
         0.1038],
        [0.0747, 0.1001, 0.1123, 0.0993, 0.1041, 0.1015, 0.1085, 0.0986, 0.1001,
         0.1007],
        [0.0729, 0.0994, 0.1092, 0.1025, 0.1034, 0.1037, 0.1075, 0.1031, 0.0988,
         0.0996],
        [0.0667, 0.0977, 0.1119, 0.1033, 0.1052, 0.1042, 0.1098, 0.1048, 0.0992,
         0.0972],
        [0.0681, 0.0962, 0.1093, 0.1036, 0.1037, 0.1039, 0.1084, 0.1063, 0.1005,
         0.1000],
        [0.0657, 0.0952

Epoch: 464.00, Train Loss: 0.02, Val Loss: 14.19, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 底下 这些 都 是 <UNK> 它们 上上 上上下下 上下 下下
Reference: it &apos;s got these fishing <UNK> on the bottom
Model: <SOS> it &apos;s got these fishing <UNK> on the bottom
Attention Weights: tensor([[0.0866, 0.0905, 0.0879, 0.0978, 0.1554, 0.0983, 0.1156, 0.0868, 0.0857,
         0.0955],
        [0.0858, 0.0894, 0.0874, 0.0996, 0.1530, 0.1003, 0.1128, 0.0854, 0.0861,
         0.1003],
        [0.0826, 0.0867, 0.0862, 0.1018, 0.1602, 0.0991, 0.1074, 0.0840, 0.0852,
         0.1067],
        [0.0811, 0.0904, 0.0883, 0.1030, 0.1462, 0.1037, 0.1116, 0.0878, 0.0891,
         0.0989],
        [0.0828, 0.0938, 0.0889, 0.1018, 0.1362, 0.1058, 0.1156, 0.0929, 0.0933,
         0.0889],
        [0.0792, 0.0926, 0.0885, 0.1003, 0.1415, 0.1014, 0.1187, 0.0944, 0.0942,
         0.0891],
        [0.0730, 0.0918, 0.0878, 0.0949, 0.1363, 0.1013, 0.1306, 0.1023, 0.1001,
         0.0818],
  

Epoch: 468.00, Train Loss: 0.02, Val Loss: 14.20, Train BLEU: 100.00, Val BLEU: 0.29
Sampling from training predictions...
Source: 大卫 <UNK> 这位 是 比尔 <UNK> 我 是 大卫 <UNK>
Reference: this is bill lange . i &apos;m dave gallo
Model: <SOS> this is bill lange . i &apos;m dave gallo
Attention Weights: tensor([[0.0895, 0.1238, 0.0898, 0.0814, 0.1320, 0.1320, 0.0779, 0.0745, 0.0784,
         0.1208],
        [0.0898, 0.1179, 0.0948, 0.0859, 0.1333, 0.1224, 0.0790, 0.0773, 0.0823,
         0.1173],
        [0.0963, 0.1081, 0.0961, 0.0944, 0.1330, 0.1179, 0.0802, 0.0819, 0.0891,
         0.1032],
        [0.0883, 0.1100, 0.0942, 0.0941, 0.1434, 0.1244, 0.0751, 0.0776, 0.0883,
         0.1046],
        [0.0872, 0.1061, 0.0998, 0.1018, 0.1444, 0.1228, 0.0775, 0.0813, 0.0914,
         0.0878],
        [0.0768, 0.1222, 0.0908, 0.0933, 0.1636, 0.1392, 0.0652, 0.0698, 0.0840,
         0.0952],
        [0.0793, 0.1214, 0.0903, 0.0912, 0.1692, 0.1364, 0.0649, 0.0695, 0.0839,
         0.0939],
        [0.07

KeyboardInterrupt: 

In [None]:
summarize_results(load_experiment_log())[['dt_created', 'num_epochs', 'learning_rate', 'clip_grad_max_norm', 'val_loss']].head()

In [None]:
plot_single_learning_curve(results)

In [None]:
# Epoch: 199.00, Train Loss: 0.32, Val Loss: 13.19, Train BLEU: 98.94, Val BLEU: 0.27
plot_single_learning_curve(results)

In [None]:
# with attention energies = v_broadcast.bmm(torch.tanh(self.attn(concat)).transpose(1, 2)) # switched order  
# Epoch: 199.00, Train Loss: 0.63, Val Loss: 12.82, Train BLEU: 92.05, Val BLEU: 0.38
plot_single_learning_curve(results)

In [None]:
for i, token in enumerate(vocab[SRC_LANG]['id2token']): 
    if i < 20: 
        print("{}: {}".format(i, token))

In [None]:
for i, token in enumerate(vocab[TARG_LANG]['id2token']): 
    if i < 20: 
        print("{}: {}".format(i, token))

In [None]:
import torch
x = torch.arange(0, 3*5*10).view(3, 5, 10)
print(x)
y = x[1:, :, :]
print(y)
z = y.view(-1, 10)
print(z)

In [None]:
t = torch.arange(0, 2*5).view(5, 2)
print(t)
u = t.contiguous().view(-1)
print(u)
v = t.permute(1, 0)
print(v)
w = v.contiguous().view(-1)
print(w)

In [None]:
a = torch.arange(0, 2*1*300)
print(a)
b = a.view(-1, 1, 300)
print(b.size())

In [None]:
for i, (src_idxs, targ_idxs, src_lens, targ_lens) in enumerate(full_loaders['train']):
#     print(i)
#     print(src_idxs.size())
#     print(src_idxs)
#     print(src_lens)
#     print(targ_idxs.size())
#     print(targ_idxs)
#     print(targ_lens)
    id2token = vocab[SRC_LANG]['id2token']
    test_tensor = src_idxs
    list_of_lists = test_tensor.numpy().astype(int).tolist()
    to_token = lambda l: ' '.join([id2token[idx] for idx in l])
    list_of_lists_tokens = [to_token(l) for l in list_of_lists] 
    break 