In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from utils.model_utils import load_baseline
from config import OPUS_MT_MODELS
from evaluation.evaluator import Evaluator

from evaluation_tools import get_language_metrics

evaluator = Evaluator()
baseline_model, baseline_tokenizer = load_baseline()
lang1 = "en"

# random is a randomly sampled distribution, not compressed.
def datagetter_opensub(lang1, lang2, split="test", comp="random"):
    return pd.read_csv(f"data/opensubtitles/compressed/{lang1}-{lang2}-{comp}.{split}.csv")

metrics_by_lang = {}
all_translations = {}
for lang in OPUS_MT_MODELS.keys():
    _metrics, _translations = get_language_metrics(
        source_lang=lang1,
        target_lang=lang,
        datagetter=datagetter_opensub,
        baseline_model=baseline_model,
        baseline_tokenizer=baseline_tokenizer,
        evaluator=evaluator,
        n_samples=1000  # for quick prototyping
    )
    metrics_by_lang[lang] = _metrics
    all_translations[lang] = _translations

In [6]:
from evaluation_tools import print_metrics
for lang_name, lang_metric in metrics_by_lang.items():
    print_metrics(lang_name, lang_metric)

Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,2.07,0.06,0.01,0.06,15.12,13.91,0.15,0.75,1.22,0.0
0.5,17.55,0.46,0.25,0.45,35.2,34.55,0.4,0.81,0.75,0.62
0.6,24.8,0.51,0.3,0.5,42.95,42.1,0.47,0.84,0.88,0.81
0.7,27.49,0.54,0.33,0.53,46.54,45.61,0.52,0.85,0.99,0.92
0.8,29.03,0.56,0.35,0.55,49.85,48.8,0.54,0.85,1.07,0.99
0.9,28.47,0.55,0.35,0.54,50.37,49.27,0.54,0.85,1.13,0.99
1.0,27.27,0.55,0.35,0.54,51.0,49.82,0.55,0.85,1.18,1.0


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,1.7,0.05,0.01,0.05,12.39,11.4,0.12,0.61,1.22,0.0
0.5,23.4,0.61,0.33,0.6,46.93,46.07,0.53,1.08,0.75,0.96
0.6,28.18,0.58,0.34,0.57,48.81,47.84,0.53,0.95,0.88,1.0
0.7,27.77,0.55,0.33,0.54,47.01,46.07,0.53,0.86,0.99,0.97
0.8,27.13,0.52,0.33,0.51,46.59,45.61,0.5,0.79,1.07,0.94
0.9,25.19,0.49,0.31,0.48,44.58,43.6,0.48,0.75,1.13,0.89
1.0,23.11,0.47,0.3,0.46,43.22,42.22,0.47,0.72,1.18,0.85


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,1.73,0.05,0.01,0.05,11.89,11.63,0.15,0.73,1.38,0.0
0.5,17.71,0.48,0.25,0.47,37.54,36.76,0.43,0.83,0.88,0.8
0.6,19.6,0.49,0.25,0.48,40.1,39.22,0.45,0.83,0.96,0.86
0.7,21.31,0.5,0.27,0.49,42.38,41.4,0.47,0.84,1.02,0.92
0.8,22.31,0.5,0.27,0.49,43.37,42.42,0.48,0.84,1.09,0.95
0.9,23.17,0.51,0.28,0.5,44.92,43.95,0.5,0.84,1.13,0.99
1.0,22.81,0.52,0.28,0.51,45.27,44.24,0.5,0.84,1.15,1.0


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,1.25,0.04,0.01,0.04,8.62,8.43,0.11,0.53,1.38,0.0
0.5,20.12,0.55,0.28,0.53,42.66,41.77,0.49,0.94,0.88,1.0
0.6,20.42,0.51,0.26,0.5,41.77,40.85,0.47,0.86,0.96,0.96
0.7,20.89,0.49,0.26,0.48,41.55,40.59,0.46,0.82,1.02,0.95
0.8,20.47,0.46,0.25,0.45,39.79,38.92,0.44,0.77,1.09,0.9
0.9,20.5,0.45,0.25,0.44,39.75,38.89,0.44,0.74,1.13,0.9
1.0,19.83,0.45,0.24,0.44,39.37,38.47,0.43,0.73,1.15,0.88


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,15.6,0.44,0.22,0.42,42.31,41.07,0.45,0.82,1.41,0.0
0.5,23.48,0.51,0.28,0.5,40.91,40.44,0.47,0.83,1.0,0.42
0.6,24.51,0.51,0.29,0.5,43.37,42.79,0.49,0.84,1.09,0.66
0.7,23.7,0.53,0.3,0.52,45.23,44.5,0.51,0.84,1.16,0.86
0.8,23.8,0.54,0.31,0.52,46.49,45.74,0.53,0.84,1.2,1.0
0.9,22.73,0.53,0.29,0.51,46.17,45.36,0.52,0.84,1.25,0.88
1.0,22.71,0.53,0.29,0.52,46.96,46.13,0.53,0.84,1.27,0.95


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,11.06,0.31,0.16,0.3,30.01,29.13,0.32,0.58,1.41,0.0
0.5,23.48,0.51,0.28,0.5,40.91,40.44,0.47,0.83,1.0,1.0
0.6,22.49,0.47,0.27,0.46,39.79,39.26,0.45,0.77,1.09,0.96
0.7,20.43,0.46,0.26,0.45,38.99,38.36,0.44,0.72,1.16,0.94
0.8,19.83,0.45,0.26,0.43,38.74,38.12,0.44,0.7,1.2,0.94
0.9,18.18,0.42,0.23,0.41,36.94,36.29,0.42,0.67,1.25,0.76
1.0,17.88,0.42,0.23,0.41,36.98,36.32,0.42,0.66,1.27,0.78


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,1.39,0.07,0.01,0.07,12.99,12.61,0.16,0.73,1.27,0.0
0.5,13.67,0.44,0.22,0.43,32.52,31.73,0.37,0.8,0.71,0.59
0.6,17.42,0.48,0.26,0.47,38.47,37.42,0.41,0.82,0.81,0.74
0.7,21.9,0.52,0.29,0.5,43.38,42.11,0.45,0.83,0.94,0.88
0.8,24.05,0.52,0.31,0.51,46.37,44.98,0.47,0.84,1.01,0.95
0.9,24.78,0.53,0.31,0.52,47.69,46.3,0.49,0.84,1.06,0.98
1.0,23.47,0.54,0.32,0.53,48.14,46.75,0.51,0.84,1.1,1.0


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,1.09,0.06,0.01,0.06,10.23,9.93,0.13,0.57,1.27,0.0
0.5,19.25,0.62,0.31,0.61,45.8,44.69,0.52,1.13,0.71,0.98
0.6,21.51,0.59,0.32,0.58,47.49,46.2,0.51,1.01,0.81,1.0
0.7,23.3,0.55,0.31,0.53,46.15,44.8,0.48,0.88,0.94,0.97
0.8,23.81,0.51,0.31,0.5,45.91,44.53,0.47,0.83,1.01,0.95
0.9,23.38,0.5,0.29,0.49,44.99,43.68,0.46,0.79,1.06,0.93
1.0,21.34,0.49,0.29,0.48,43.76,42.5,0.46,0.76,1.1,0.9


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,0.89,0.02,0.0,0.02,11.85,11.4,0.13,0.71,1.39,0.0
0.5,12.47,0.39,0.17,0.39,32.85,31.96,0.37,0.8,0.82,0.65
0.6,14.73,0.42,0.19,0.41,36.77,35.74,0.4,0.81,0.89,0.75
0.7,18.39,0.45,0.23,0.45,40.07,39.05,0.44,0.82,0.95,0.88
0.8,20.5,0.47,0.24,0.46,43.02,41.84,0.47,0.83,1.04,0.96
0.9,21.9,0.47,0.25,0.46,44.43,43.25,0.47,0.83,1.08,0.99
1.0,20.86,0.47,0.25,0.46,45.67,44.26,0.48,0.83,1.14,1.0


Unnamed: 0,bleu,r1,r2,rl,chrF,chrf++,meteor,bert_f1,len_ratio,normalized_score
baseline,0.64,0.01,0.0,0.01,8.53,8.2,0.09,0.51,1.39,0.0
0.5,15.21,0.48,0.21,0.48,40.06,38.98,0.45,0.98,0.82,0.93
0.6,16.55,0.47,0.21,0.46,41.31,40.16,0.45,0.91,0.89,0.94
0.7,19.36,0.47,0.24,0.47,42.18,41.11,0.46,0.86,0.95,1.0
0.8,19.71,0.45,0.23,0.44,41.37,40.23,0.45,0.8,1.04,0.97
0.9,20.28,0.44,0.23,0.43,41.14,40.05,0.44,0.77,1.08,0.97
1.0,18.3,0.41,0.22,0.4,40.06,38.82,0.42,0.73,1.14,0.91
