In [1]:
# https://github.com/bentrevett/pytorch-seq2seq/blob/master/1%20-%20Sequence%20to%20Sequence%20Learning%20with%20Neural%20Networks.ipynb
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import TranslationDataset, Multi30k
from torchtext.data import Field, BucketIterator, ReversibleField
from nltk.translate.bleu_score import sentence_bleu
import spacy

import random
import math
import os

In [2]:
SEED = 1

random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
spacy_de = spacy.load('de')
spacy_en = spacy.load('en')

In [4]:
def tokenize_de(text):
    """
    Tokenizes German text from a string into a list of strings (tokens) and reverses it
    """
    return [tok.text for tok in spacy_de.tokenizer(text)][::-1]

def tokenize_en(text):
    """
    Tokenizes English text from a string into a list of strings (tokens)
    """
    return [tok.text for tok in spacy_en.tokenizer(text)]

In [5]:
SRC = Field(tokenize=tokenize_de, init_token='<sos>', eos_token='<eos>', lower=True)
TRG = Field(tokenize=tokenize_en, init_token='<sos>', eos_token='<eos>', lower=True)
# TEXT = ReversibleField(sequential=True, lower=True, include_lengths=True)

train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC, TRG))

print(f"Number of training examples: {len(train_data.examples)}")
print(f"Number of validation examples: {len(valid_data.examples)}")
print(f"Number of testing examples: {len(test_data.examples)}")

Number of training examples: 29000
Number of validation examples: 1014
Number of testing examples: 1000


In [6]:
print(vars(train_data.examples[0]))

{'src': ['.', 'büsche', 'vieler', 'nähe', 'der', 'in', 'freien', 'im', 'sind', 'männer', 'weiße', 'junge', 'zwei'], 'trg': ['two', 'young', ',', 'white', 'males', 'are', 'outside', 'near', 'many', 'bushes', '.']}


In [7]:
SRC.build_vocab(train_data, min_freq=2)
TRG.build_vocab(train_data, min_freq=2)

print(f"Unique tokens in source (de) vocabulary: {len(SRC.vocab)}")
print(f"Unique tokens in target (en) vocabulary: {len(TRG.vocab)}")
print(TRG.vocab.stoi["two"])
print(TRG.vocab.stoi["young"])
trg_arr = [   4,   14,    6,    4,  314,  109,   10,   32,   11, 2763,   20,    4,  94,   15,   13,    4,  531,   12,  121,   18,  413,  344,   44,    5,3]
for elem in trg_arr:
    print(TRG.vocab.itos[elem])
# print(TEXT.reverse(torch.tensor(trg_arr)))



Unique tokens in source (de) vocabulary: 7853
Unique tokens in target (en) vocabulary: 5893
16
24
a
woman
in
a
tank
top
is
sitting
and
knitting
at
a
table
,
with
a
bottle
of
something
to
drink
beside
her
.
<eos>


In [8]:
BATCH_SIZE = 128

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), batch_size=BATCH_SIZE, device=device)

cpu


In [9]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        self.dropout = dropout
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src):
        
        #src = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(src))
        
        #embedded = [sent len, batch size, emb dim]
        
        outputs, (hidden, cell) = self.rnn(embedded)
        
        #outputs = [sent len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #outputs are always from the top hidden layer
        
        return hidden, cell

In [10]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()

        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.dropout = dropout
        
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        
        self.out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, cell):
        
        #input = [batch size]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #n directions in the decoder will both always be 1, therefore:
        #hidden = [n layers, batch size, hid dim]
        #context = [n layers, batch size, hid dim]
        
        input = input.unsqueeze(0)
        
        #input = [1, batch size]
        
        embedded = self.dropout(self.embedding(input))
        
        #embedded = [1, batch size, emb dim]
                
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #sent len and n directions will always be 1 in the decoder, therefore:
        #output = [1, batch size, hid dim]
        #hidden = [n layers, batch size, hid dim]
        #cell = [n layers, batch size, hid dim]
        
        prediction = self.out(output.squeeze(0))
        
        #prediction = [batch size, output dim]
        
        return prediction, hidden, cell

In [11]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        assert encoder.hid_dim == decoder.hid_dim, "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, "Encoder and decoder must have equal number of layers!"
        
    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        
        #src = [sent len, batch size]
        #trg = [sent len, batch size]
        #teacher_forcing_ratio is probability to use teacher forcing
        #e.g. if teacher_forcing_ratio is 0.75 we use ground-truth inputs 75% of the time
        
        batch_size = trg.shape[1]
        max_len = trg.shape[0]
#         print("trg shape", trg.shape)
        trg_vocab_size = self.decoder.output_dim
#         print("batch size ", batch_size)
#         print("max len ", max_len)
#         print("trg vocab size ", trg_vocab_size)
        
        #tensor to store decoder outputs
        outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)
#         print("shape of outputs")
#         print(outputs.shape)
        #last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(src)
        
        #first input to the decoder is the <sos> tokens
        input = trg[0,:]
#         print("first input")
#         print(input)
        for t in range(1, max_len):
#             print("in LOOP")
            output, hidden, cell = self.decoder(input, hidden, cell)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
#             print("output in loop")
#             print(output)
#             print(output.max(1))
            top1 = output.max(1)[1] # index of the max value
            input = (trg[t] if teacher_force else top1)
        
        return outputs

In [12]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, device).to(device)

optimizer = optim.Adam(model.parameters())

pad_idx = TRG.vocab.stoi['<pad>']

criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)


In [13]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        print("train iterator ", i)
        src = batch.src
        trg = batch.trg
        optimizer.zero_grad()
        
        output = model(src, trg)
        #trg = [sent len, batch size]
        #output = [sent len, batch size, output dim]
        
        #trg_a = [sent len, batch size]
        #output_b = [sent len, batch size, output dim]
        
        #reshape to:
        #trg = [(sent len - 1) * batch size]
        #output = [(sent len - 1) * batch size, output dim]


#         x = output[1:].view(-1, output.shape[2])
#         y = trg[1:].view(-1)
        

        loss = criterion(output[1:].view(-1, output.shape[2]), trg[1:].view(-1))
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
        
    return epoch_loss / len(iterator)

In [14]:
import torch
def process_final(t):
    sentences = []
    for i in range(len(t[0])):
        tensor_out = t[:, i]
        tensor_out = list(tensor_out.numpy())
        sentence = []
        for j in tensor_out:
            if TRG.vocab.itos[j] == '<pad>':
                break
            if TRG.vocab.itos[j] == '<eos>':
                sentence.append(TRG.vocab.itos[j])
                break
            sentence.append(TRG.vocab.itos[j])
        sentences.append(sentence)
    return sentences


def process_output(output):
    y = []
    for elem in output:
        x = []
        z = []
        for arr in elem:
            values, indices = arr.max(0)
            x.append(indices.item())
            z.append(values.item())
#         print("printing x")
#         print(x)
#         print("printing z")
#         print(z)


        y.append(x)
    final_tensor = torch.tensor(y)
    print("final tensor ")
    print(final_tensor)
    return process_final(final_tensor)




In [15]:
def evaluate(model, iterator, criterion, testing):
    
    model.eval()
    
    epoch_loss = 0
    bleu_score = 0
    count_pair = 0
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.src
            trg = batch.trg
            
            output = model(src, trg, 0) #turn off teacher forcing
            if testing:
                output_a = output[1:]
                trg_a = trg[1:]

                sent_out = process_output(output_a)
                sent_trg = process_final(trg_a)
                for o, t in zip(sent_out, sent_trg):
                    reference = [t]
                    candidate = o
                    print("reference ")
                    print(reference)
                    print("candidate ")
                    print(candidate)
                    bleu_score += sentence_bleu(reference, candidate)
                    count_pair += 1




#             print("output sentences ")
#             print(output_a)
#             print("after processing")
#             print(sent_out)
#             print("target sentences")
#             print(trg_a)
#             print("after processing")
#             print(sent_trg)
            
            
            
            
            x = output[1:].view(-1, output.shape[2])
            y = trg[1:].view(-1)
            
            
        
            

            loss = criterion(output[1:].view(-1, output.shape[2]), trg[1:].view(-1))

            epoch_loss += loss.item()
        if testing:
            print("count pair ", count_pair)
            print("bleu score ")


            print(bleu_score / count_pair)
    return epoch_loss / len(iterator)

In [16]:
N_EPOCHS = 10
CLIP = 1
SAVE_DIR = 'models'
MODEL_SAVE_PATH = os.path.join(SAVE_DIR, 'seq_2_seq.pt')

best_valid_loss = float('inf')

if not os.path.isdir(f'{SAVE_DIR}'):
    os.makedirs(f'{SAVE_DIR}')

for epoch in range(N_EPOCHS):
    print("epoch ", epoch)
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion, False)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
    
    print(f'| Epoch: {epoch+1:03} | Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f} | Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f} |')
    print(f'| Epoch: {epoch+1:03} | Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}  |')

epoch  0
train iterator  0
train iterator  1
train iterator  2
train iterator  3
train iterator  4
train iterator  5
train iterator  6
train iterator  7
train iterator  8
train iterator  9
train iterator  10
train iterator  11
train iterator  12
train iterator  13
train iterator  14
train iterator  15
train iterator  16
train iterator  17
train iterator  18
train iterator  19
train iterator  20
train iterator  21
train iterator  22
train iterator  23
train iterator  24
train iterator  25
train iterator  26
train iterator  27
train iterator  28
train iterator  29
train iterator  30
train iterator  31
train iterator  32
train iterator  33
train iterator  34
train iterator  35
train iterator  36
train iterator  37
train iterator  38
train iterator  39
train iterator  40
train iterator  41
train iterator  42
train iterator  43
train iterator  44
train iterator  45
train iterator  46
train iterator  47
train iterator  48
train iterator  49
train iterator  50
train iterator  51
train iterato

train iterator  186
train iterator  187
train iterator  188
train iterator  189
train iterator  190
train iterator  191
train iterator  192
train iterator  193
train iterator  194
train iterator  195
train iterator  196
train iterator  197
train iterator  198
train iterator  199
train iterator  200
train iterator  201
train iterator  202
train iterator  203
train iterator  204
train iterator  205
train iterator  206
train iterator  207
train iterator  208
train iterator  209
train iterator  210
train iterator  211
train iterator  212
train iterator  213
train iterator  214
train iterator  215
train iterator  216
train iterator  217
train iterator  218
train iterator  219
train iterator  220
train iterator  221
train iterator  222
train iterator  223
train iterator  224
train iterator  225
train iterator  226
| Epoch: 002 | Train Loss: 4.463 | Train PPL:  86.735 | Val. Loss: 4.692 | Val. PPL: 109.118 |
| Epoch: 002 | Train Loss: 4.463 | Train PPL:  86.735  |
epoch  2
train iterator  0
t

train iterator  137
train iterator  138
train iterator  139
train iterator  140
train iterator  141
train iterator  142
train iterator  143
train iterator  144
train iterator  145
train iterator  146
train iterator  147
train iterator  148
train iterator  149
train iterator  150
train iterator  151
train iterator  152
train iterator  153
train iterator  154
train iterator  155
train iterator  156
train iterator  157
train iterator  158
train iterator  159
train iterator  160
train iterator  161
train iterator  162
train iterator  163
train iterator  164
train iterator  165
train iterator  166
train iterator  167
train iterator  168
train iterator  169
train iterator  170
train iterator  171
train iterator  172
train iterator  173
train iterator  174
train iterator  175
train iterator  176
train iterator  177
train iterator  178
train iterator  179
train iterator  180
train iterator  181
train iterator  182
train iterator  183
train iterator  184
train iterator  185
train iterator  186


train iterator  87
train iterator  88
train iterator  89
train iterator  90
train iterator  91
train iterator  92
train iterator  93
train iterator  94
train iterator  95
train iterator  96
train iterator  97
train iterator  98
train iterator  99
train iterator  100
train iterator  101
train iterator  102
train iterator  103
train iterator  104
train iterator  105
train iterator  106
train iterator  107
train iterator  108
train iterator  109
train iterator  110
train iterator  111
train iterator  112
train iterator  113
train iterator  114
train iterator  115
train iterator  116
train iterator  117
train iterator  118
train iterator  119
train iterator  120
train iterator  121
train iterator  122
train iterator  123
train iterator  124
train iterator  125
train iterator  126
train iterator  127
train iterator  128
train iterator  129
train iterator  130
train iterator  131
train iterator  132
train iterator  133
train iterator  134
train iterator  135
train iterator  136
train iterato

train iterator  35
train iterator  36
train iterator  37
train iterator  38
train iterator  39
train iterator  40
train iterator  41
train iterator  42
train iterator  43
train iterator  44
train iterator  45
train iterator  46
train iterator  47
train iterator  48
train iterator  49
train iterator  50
train iterator  51
train iterator  52
train iterator  53
train iterator  54
train iterator  55
train iterator  56
train iterator  57
train iterator  58
train iterator  59
train iterator  60
train iterator  61
train iterator  62
train iterator  63
train iterator  64
train iterator  65
train iterator  66
train iterator  67
train iterator  68
train iterator  69
train iterator  70
train iterator  71
train iterator  72
train iterator  73
train iterator  74
train iterator  75
train iterator  76
train iterator  77
train iterator  78
train iterator  79
train iterator  80
train iterator  81
train iterator  82
train iterator  83
train iterator  84
train iterator  85
train iterator  86
train iterat

train iterator  219
train iterator  220
train iterator  221
train iterator  222
train iterator  223
train iterator  224
train iterator  225
train iterator  226
| Epoch: 009 | Train Loss: 3.366 | Train PPL:  28.965 | Val. Loss: 3.969 | Val. PPL:  52.937 |
| Epoch: 009 | Train Loss: 3.366 | Train PPL:  28.965  |
epoch  9
train iterator  0
train iterator  1
train iterator  2
train iterator  3
train iterator  4
train iterator  5
train iterator  6
train iterator  7
train iterator  8
train iterator  9
train iterator  10
train iterator  11
train iterator  12
train iterator  13
train iterator  14
train iterator  15
train iterator  16
train iterator  17
train iterator  18
train iterator  19
train iterator  20
train iterator  21
train iterator  22
train iterator  23
train iterator  24
train iterator  25
train iterator  26
train iterator  27
train iterator  28
train iterator  29
train iterator  30
train iterator  31
train iterator  32
train iterator  33
train iterator  34
train iterator  35
train

In [17]:
model.load_state_dict(torch.load(MODEL_SAVE_PATH))

test_loss = evaluate(model, test_iterator, criterion, True)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

final tensor 
tensor([[ 16, 110,   4,  ...,   4,  24,  16],
        [112,  19,  34,  ...,  14,  14,  30],
        [112,  17,  10,  ...,  10,  10,  22],
        ...,
        [  5,   3,   3,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3]])
reference 
[['two', 'medium', 'sized', 'dogs', 'run', 'across', 'the', 'snow', '.', '<eos>']]
candidate 
['two', 'dogs', 'dogs', 'running', 'through', 'the', 'snow', '.', '<eos>']
reference 
[['four', 'people', 'are', 'playing', 'soccer', 'on', 'a', 'beach', '.', '<eos>']]
candidate 
['four', 'people', 'are', 'playing', 'on', 'the', 'beach', '.', '<eos>']
reference 
[['a', 'boy', 'riding', 'a', 'skateboard', 'on', 'a', 'skateboarding', 'ramp', '<eos>']]
candidate 
['a', 'boy', 'is', 'riding', 'a', 'a', 'on', 'a', '.', '.', '<eos>']
reference 
[['a', 'dog', 'is', 'jumping', 'through', 'a', '<unk>', 'obstacle', '.', '<eos>']]
candidate 
['a', 'dog', 'jumps', 'through', 'a', 'air', '.', '<e

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


final tensor 
tensor([[ 4,  4,  4,  ...,  4, 63,  4],
        [14, 35,  9,  ..., 14, 17,  6],
        [10, 10, 22,  ..., 13,  6,  6],
        ...,
        [ 5,  3,  5,  ...,  5,  3,  5],
        [ 3,  5,  3,  ...,  3,  3,  3],
        [ 5,  3,  5,  ...,  3,  3,  3]])
reference 
[['woman', 'selling', 'bags', 'of', 'fruit', 'on', 'a', 'sidewalk', '.', '<eos>']]
candidate 
['a', 'woman', 'is', 'on', 'a', 'street', 'with', 'with', 'graffiti', '.', '<eos>']
reference 
[['a', 'dog', 'runs', 'outside', 'with', 'a', 'yellow', 'toy', '.', '<eos>']]
candidate 
['a', 'dog', 'is', 'with', 'a', 'red', 'toy', '.', '<eos>']
reference 
[['a', 'man', 'wearing', 'an', 'orange', 'shirt', 'and', 'helmet', '.', '<eos>']]
candidate 
['a', 'man', 'wearing', 'a', 'red', 'and', 'and', 'a', 'hat', '.', '<eos>']
reference 
[['guitar', 'player', 'performs', 'at', 'a', 'nightclub', 'red', 'guitar', '.', '<eos>']]
candidate 
['a', 'playing', 'on', 'a', 'stage', 'with', 'a', 'red', 'in', '.', '<eos>']
reference 
[['

final tensor 
tensor([[ 4,  4, 63,  ...,  7,  4,  7],
        [ 9, 38, 17,  ..., 34, 24, 25],
        [ 6, 12, 18,  ..., 10, 12, 35],
        ...,
        [ 3,  5,  3,  ...,  3,  5,  3],
        [ 3,  3,  3,  ...,  3,  3,  3],
        [ 3,  3,  3,  ...,  3,  3,  3]])
reference 
[['a', 'man', 'in', 'a', 'suit', 'is', 'sitting', 'at', 'a', 'bus', 'stop', '.', '<eos>']]
candidate 
['a', 'man', 'in', 'a', 'suit', 'is', 'sitting', 'on', 'a', 'bench', '.', '<eos>']
reference 
[['a', 'group', 'of', 'people', 'walk', 'down', 'the', 'street', 'in', 'the', 'sun', '.', '<eos>']]
candidate 
['a', 'group', 'of', 'people', 'walking', 'down', 'the', 'street', '.', '<eos>']
reference 
[['children', 'fight', 'to', 'win', 'a', 'tug', '-', 'of', '-', 'war', 'battle', '.', '<eos>']]
candidate 
['children', 'are', 'to', 'to', '<unk>', '<unk>', '.', '<eos>']
reference 
[['a', 'black', 'and', 'white', 'dog', 'is', 'playing', 'with', 'a', 'white', 'ball', '.', '<eos>']]
candidate 
['a', 'black', 'and', 'white

final tensor 
tensor([[ 24,  16,   4,  ..., 110,  63,  19],
        [127,  30,  24,  ...,  24,  17,  36],
        [ 37,  17,   9,  ...,  11,   6,   6],
        ...,
        [  5,   3,   3,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3]])
reference 
[['small', 'orchestra', 'playing', 'with', 'open', 'violin', 'case', 'in', 'front', '<eos>']]
candidate 
['young', 'boys', 'playing', 'on', 'stage', 'with', 'a', 'on', 'the', 'ground', '.', '<eos>']
reference 
[['two', 'men', 'sit', 'talking', 'near', 'a', 'stone', 'building', '.', '<eos>']]
candidate 
['two', 'men', 'are', 'sitting', 'a', 'a', 'of', 'a', 'a', 'a', '.', '<eos>']
reference 
[['a', 'young', 'man', 'skateboards', 'off', 'a', 'pink', 'railing', '.', '<eos>']]
candidate 
['a', 'young', 'man', 'is', 'a', 'a', 'a', 'a', 'a', 'a', '.', '<eos>']
reference 
[['a', 'girl', 'at', 'the', 'shore', 'of', 'a', 'beach', 'with', 'a', 'mountain', 'in', 'the', 'distance', '.', '<

final tensor 
tensor([[  4,   4,   4,  ...,   4,  21,   4],
        [106, 357,  24,  ...,  35, 233,   0],
        [106,   6,  64,  ...,  13,  14,  12],
        ...,
        [  3,   3,  98,  ...,   3,   3,   3],
        [  3,   3,   5,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3]])
reference 
[['lady', 'with', 'a', 'green', 'mask', 'at', 'the', 'dentist', 'and', 'she', 'just', 'look', 'very', 'unhappy', '.', '<eos>']]
candidate 
['a', 'asian', 'asian', 'woman', 'with', 'a', 'red', 'hair', 'is', 'a', 'a', 'a', '.', '<eos>']
reference 
[['a', 'youth', 'wearing', 'a', 'shirt', 'and', 'tie', 'crouching', 'down', 'and', 'giving', 'the', 'peace', 'sign', '.', '<eos>']]
candidate 
['a', 'toddler', 'in', 'a', 'blue', 'shirt', 'and', 'a', 'a', 'a', 'a', 'a', '.', '<eos>']
reference 
[['an', 'african', 'tribe', 'is', 'standing', 'in', 'their', 'garden', 'with', 'the', 'forest', 'in', 'the', 'background', '.', '<eos>']]
candidate 
['a', 'young', 'person', 'is', 'standing', 'i

final tensor 
tensor([[  4,   7,   4,  ...,   4,   4,   4],
        [ 14, 105,   9,  ...,  24,   9,   6],
        [ 13,   6,   6,  ...,  34,   6,   6],
        ...,
        [  3,   3,   5,  ...,   3,   3,   3],
        [  3,   3,   5,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3]])
reference 
[['a', 'woman', 'with', 'a', 'push', '-', 'to', '-', 'talk', 'cellphone', 'and', 'headset', 'waiting', 'at', 'a', 'crosswalk', '.', '<eos>']]
candidate 
['a', 'woman', 'with', 'a', '<unk>', 'and', 'a', 'a', 'a', 'a', 'a', 'a', 'a', '.', '<eos>']
reference 
[['the', 'soccer', 'player', 'wearing', 'the', 'gold', 'jersey', 'is', 'blocking', 'the', 'ball', 'from', 'the', 'opposing', 'team', 'player', '.', '<eos>']]
candidate 
['the', 'player', 'in', 'the', 'white', 'jersey', 'is', 'the', 'ball', 'in', 'the', 'air', '.', '<eos>']
reference 
[['a', 'man', 'dressed', 'in', 'black', 'leather', 'and', 'a', 'cowboy', 'hat', 'is', 'walking', 'around', 'a', 'renaissance', 'festival', '.',

final tensor 
tensor([[  4,   4,   4,  ...,   4,   4,  16],
        [ 14,   9,  24,  ...,  64,  64,  24],
        [ 10,   6,   9,  ...,   6,  10, 104],
        ...,
        [  3,   3,   3,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3],
        [  3,   3,   3,  ...,   3,   3,   3]])
reference 
[['a', 'woman', 'runs', 'after', 'making', 'a', 'hit', 'in', 'women', "'s", 'softball', ',', 'the', 'catcher', 'rises', 'to', 'her', 'feet', '.', '<eos>']]
candidate 
['a', 'woman', 'is', 'her', '<unk>', 'to', 'the', 'the', 'while', 'the', '<unk>', 'while', 'a', 'a', 'watches', '.', '<eos>']
reference 
[['a', 'man', 'in', 'uniform', 'and', 'a', 'man', 'in', 'a', 'blue', 'shirt', 'are', 'standing', 'in', 'front', 'of', 'a', 'truck', '.', '<eos>']]
candidate 
['a', 'man', 'in', 'a', 'black', 'shirt', 'and', 'a', 'man', 'in', 'a', 'black', 'shirt', 'standing', 'in', 'front', 'of', 'a', 'building', '.', '<eos>']
reference 
[['a', 'young', 'man', 'in', 'a', 'blue', 'shirt', 'grinds

final tensor 
tensor([[  4,   4,   4,  ...,   7,   4,   4],
        [  9,   9,  34,  ...,  16,   9, 120],
        [  6,   6,   6,  ...,  26,  91,  73],
        ...,
        [  3,   3,   3,  ...,   3,   3,   3],
        [ 98,  98,   3,  ...,   3,   3,   3],
        [  5,   5,   3,  ...,   3,   3,   3]])
reference 
[['a', 'man', 'in', 'a', 'black', 'jacket', 'and', 'checkered', 'hat', 'wearing', 'black', 'and', 'white', 'striped', 'pants', 'plays', 'an', 'electric', 'guitar', 'on', 'a', 'stage', 'with', 'a', 'singer', 'and', 'another', 'guitar', 'player', 'in', 'the', 'background', '.', '<eos>']]
candidate 
['a', 'man', 'in', 'a', 'black', 'and', 'and', 'a', 'black', 'hat', ',', 'and', 'a', 'hat', 'is', 'playing', 'a', 'a', 'stage', 'with', 'a', 'a', 'on', 'a', 'stage', 'with', 'a', 'on', 'the', 'background', '.', '<eos>']
reference 
[['a', 'man', 'wearing', 'a', 'gray', 'shirt', ',', 'blue', 'jeans', 'and', 'a', 'neon', 'green', 'safety', 'vest', 'is', 'standing', 'on', 'a', 'railroad',