In [1]:
%%capture
!pip install transformers sentencepiece sacremoses datasets sacrebleu

In [2]:
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset
from sacrebleu.metrics import BLEU, CHRF, TER
import time

In [3]:
models = [
    "facebook/wmt19-en-ru", 
    "facebook/nllb-200-distilled-600M", 
    "Helsinki-NLP/opus-mt-en-ru"
    ]

In [4]:
dataset = load_dataset("wmt16", "ru-en", split="test[:3%]")
translations = dataset["translation"]
source_texts = [translation['ru'] for translation in translations]
target_texts = [translation['en'] for translation in translations]



In [5]:
def translate_text(model, model_name, text):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
    start_time = time.time()
    outputs = model.generate(inputs.to(model.device))
    end_time = time.time()
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded, end_time - start_time

In [6]:
results = []

In [7]:
for model_name in models:
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    model.to("cuda")
    translations = []
    total_time = 0
    
    for source_text in source_texts:
        translation, time_taken = translate_text(model, model_name, source_text)
        translations.append(translation)
        total_time += time_taken
    
    
    chrf = CHRF()
    chrf_score = chrf.corpus_score(translations, target_texts).score
    
    bleu = BLEU()
    bleu_score = bleu.corpus_score(translations, target_texts).score

    ter = TER()
    ter_score = ter.corpus_score(translations, [target_texts]).score
    avg_time = total_time / len(source_texts)
    
    result = {
        "Model name": model_name,
        "Number of params": model.num_parameters(),
        "Speed(seconds)": avg_time,
        "CHRF Score": chrf_score,
        "BLEU Score": bleu_score,
        "TER Score": ter_score,
    }
    results.append(result)

df = pd.DataFrame(results)



In [8]:
df

Unnamed: 0,Model name,Number of params,Speed(seconds),CHRF Score,BLEU Score,TER Score
0,facebook/wmt19-en-ru,293195776,0.388051,2.788104,0.190268,104.297994
1,facebook/nllb-200-distilled-600M,615073792,0.600816,3.472222,0.169931,109.283668
2,Helsinki-NLP/opus-mt-en-ru,76672000,1.139953,3.220612,0.113596,146.762178
