# Model Evaluation
## Load Model

In [2]:
import torch
from torchtext.data.utils import get_tokenizer
from src.utils import *

%matplotlib inline

SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
model_pth = "../models/"
model_name = "transformer-5-22-2-best"
model = torch.load(model_pth + model_name + ".pth.tar")
model.eval()

AttributeError: Can't get attribute 'Seq2SeqTransformer' on <module 'my_transformer' from 'c:\\Users\\10799\\OneDrive - 南方科技大学\\Code\\ME338\\nmt-multi30k-pytorch\\my_transformer.py'>

In [2]:
print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 7,667,147 trainable parameters


## Build Vocabulary

In [3]:
pth_base = "./.data/multi30k/task1/raw/"
train_pths = ('train.de', 'train.en')
val_pths = ('val.de', 'val.en')
test_pths = ('test_2016_flickr.de', 'test_2016_flickr.en')
train_filepaths = [(pth_base + pth) for pth in train_pths]
test_filepaths = [(pth_base + pth) for pth in test_pths]

de_tokenizer = get_tokenizer('spacy', language='de_core_news_sm')
en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')

de_vocab = build_vocab(train_filepaths[0], de_tokenizer, min_freq=3)
en_vocab = build_vocab(train_filepaths[1], en_tokenizer, min_freq=3)

BOS_IDX = de_vocab['<bos>']
EOS_IDX = de_vocab['<eos>']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
print(len(de_vocab))
print(len(en_vocab))

5374
4555


In [5]:
translate(model, "eine gruppe von menschen steht vor einem iglu .", de_vocab, en_vocab, de_tokenizer, BOS_IDX, EOS_IDX, device)

' a group of people standing in front of a <unk> sign . '

## Prepare Reference and Predictions

In [6]:
'''load reference'''
with open(test_filepaths[0], 'r', encoding='utf8') as f:
    test_data = f.readlines()
    
'''update reference.txt'''
with open(test_filepaths[1], 'r', encoding='utf8') as f:
    reference = f.readlines()

for i in range(len(reference)):
    reference[i] = " ".join(en_tokenizer(reference[i])).lower()

with open("reference.txt",'w+') as f:
    f.writelines(reference)

In [7]:
'''make predictions'''
predictions = []
for data in test_data:
    temp_trans = translate(model, data.lower(), de_vocab, en_vocab, de_tokenizer, BOS_IDX, EOS_IDX, device)
    predictions.append((temp_trans[1:-3]+" . \n"))

'''update predictions.txt'''
with open("predictions.txt",'w+') as f:
    f.writelines(predictions)

In [11]:
for i,pre in enumerate(predictions):
    predictions[i] = pre.replace("<unk>"," ")
'''update predictions.txt'''
with open("predictions.txt",'w+') as f:
    f.writelines(predictions)

## Calculate BLEU

In [12]:
! perl ./multi-bleu.perl -lc reference.txt < predictions.txt

with open(model_pth + model_name + ".txt",'w+') as f:    
    f.writelines(predictions)

BLEU = 37.14, 70.6/45.7/30.7/21.1 (BP=0.976, ratio=0.977, hyp_len=12752, ref_len=13058)


In [13]:
from torchtext.data.metrics import bleu_score
references_corpus  = []
candidate_corpus = []
for pred,ref in zip(predictions, reference):
    temp = pred.rstrip(" \n").split(" ")
    candidate_corpus.append(temp)
    temp = ref.rstrip(" \n").split(" ")
    references_corpus.append([temp])
bleu_torchtext = bleu_score(candidate_corpus, references_corpus)
print(f'BLEU score = {bleu_torchtext*100:.2f}')

BLEU score = 33.84


In [14]:
print(references_corpus[3])
print(candidate_corpus[3])

[['five', 'people', 'wearing', 'winter', 'jackets', 'and', 'helmets', 'stand', 'in', 'the', 'snow', ',', 'with', 'snowmobiles', 'in', 'the', 'background', '.']]
['five', 'people', 'in', 'winter', 'jackets', 'and', 'helmets', 'are', 'standing', 'in', 'the', 'snow', 'with', '', '', 'in', 'the', 'background', '.']


### Check BLEU from txt

In [1]:
# with open(model_pth + "transformer-5-21-7-best" + ".txt",'r') as f:    
#     predictions = f.readlines()

# with open("predictions.txt",'w+') as f:
#     f.writelines(predictions)

!perl ./multi-bleu.perl -lc reference.txt < predictions.txt

'perl' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
