In [34]:
%load_ext autoreload
%autoreload 2

from mint.model.transformer import Transformer
from mint.translator import Translator, BeamSearch, GreedySearch
from mint.tokenizer import Tokenizer
from mint.dataset import Dataset
from mint.metrics import bleu, chrf2
from tqdm import tqdm
import torch

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
model = Transformer.load("../pretrained/model")
source_tokenizer = Tokenizer.load("../datasets/en_sk/source_tokenizer")
target_tokenizer = Tokenizer.load("../datasets/en_sk/target_tokenizer")
dataset = Dataset.load("../datasets/en_sk/")["test"]

MAX_STEPS = 10
USE_CUDA = False

device = torch.device("cuda" if torch.cuda.is_available() and USE_CUDA else "cpu")
if USE_CUDA:
    model.to(device)

def evaluate(translator, max_length=None):
    bleu_total, chrf2_total = 0, 0
    for i, sample in tqdm(enumerate(dataset), "Evaluating", total=MAX_STEPS):
        if i >= MAX_STEPS:
            break
        source = sample["source"]
        target = sample["target"]
        if max_length is not None:
            source = source_tokenizer.clip([source], max_length)[0]
            target = target_tokenizer.clip([target], max_length)[0]
        
        prediction = translator.translate(source, max_length)
        bleu_total += bleu(prediction, target)
        chrf2_total += chrf2(prediction, target)
        
    bleu_score = bleu_total / MAX_STEPS
    chrf2_score = chrf2_total / MAX_STEPS
    print(f"BLEU: {bleu_score:.4f}, CHRF2: {chrf2_score:.4f}")
    
    return bleu_score, chrf2_score

{'n_blocks': 10, 'vocab_size': 10001, 'transformer_block_config': {'d_model': 512, 'd_feedforward': 2048, 'p_dropout': 0.1, 'attention_config': {'n_heads': 8, 'd_model': 512, 'max_seq_len': 129, 'context_window': None}}, 'embedding_config': {'vocab_size': 10001, 'd_model': 512, 'max_seq_len': 129, 'learnable_positional_embeddings': True}}


In [36]:
translator = Translator(model, source_tokenizer, target_tokenizer)
evaluate(translator, max_length=128)

Evaluating: 100%|██████████| 10/10 [00:52<00:00,  5.27s/it]

BLEU: 0.0000, CHRF2: 1.0366





(tensor(1.0491e-08), 1.0366107796006776)

In [None]:
translator = Translator(model, source_tokenizer, target_tokenizer, search_strategy=BeamSearch(5))
evaluate(translator, max_length=128)

Evaluating:   0%|          | 0/10 [00:00<?, ?it/s]