# Model Evaluation
## Load Model

In [3]:
import math
import torchtext
import torch
import torch.nn as nn
from torchtext.data.utils import get_tokenizer
from collections import Counter
from torchtext.vocab import Vocab
from torchtext.utils import download_from_url, extract_archive
from torch import Tensor
import time
import io
from utils import *

%matplotlib inline

torch.manual_seed(0)
model = torch.load("./models/transformer-5-18.pth.tar")
model.eval()

Seq2SeqTransformer(
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=512, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=512, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
   

## Build Vocabulary

In [2]:
pth_base = "./.data/multi30k/task1/raw/"

train_pths = ('train.de.gz', 'train.en.gz')
test_pths = ('test_2016_flickr.de.gz', 'test_2016_flickr.en.gz')

train_filepaths = [extract_archive(pth_base + pth)[0] for pth in train_pths]
test_filepaths = [extract_archive(pth_base + pth)[0] for pth in test_pths]

de_tokenizer = get_tokenizer('spacy', language='de_core_news_sm')
en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')

de_vocab = build_vocab(train_filepaths[0], de_tokenizer)
en_vocab = build_vocab(train_filepaths[1], en_tokenizer)

BOS_IDX = de_vocab['<bos>']
EOS_IDX = de_vocab['<eos>']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
translate(model, "Eine Gruppe von Menschen steht vor einem Iglu .", de_vocab, en_vocab, de_tokenizer, BOS_IDX, EOS_IDX, device)

' A group of people stand in front of an igloo . '

## Prepare Reference and Predictions

In [4]:
'''load reference'''
with open(test_filepaths[0], 'r', encoding='utf8') as f:
    test_data = f.readlines()
    
'''update reference.txt'''
with open(test_filepaths[1], 'r', encoding='utf8') as f:
    reference = f.readlines()

for i in range(len(reference)):
    reference[i] = reference[i].replace(".\n"," .\n")

with open("reference.txt",'w+') as f:
    f.writelines(reference)

In [5]:
'''make predictions'''
predictions = []
for data in test_data:
    temp_trans = translate(model, data, de_vocab, en_vocab, de_tokenizer, BOS_IDX, EOS_IDX, device)
    predictions.append(temp_trans[1:-3]+" .\n")

'''update predictions.txt'''
with open("predictions.txt",'w+') as f:
    f.writelines(predictions)

## Calculate BLEU

In [7]:
! perl ./multi-bleu.perl -lc reference.txt < predictions.txt

BLEU = 31.73, 62.5/38.5/25.1/16.8 (BP=1.000, ratio=1.104, hyp_len=14161, ref_len=12825)
