## Evaluation
* We just evaluate everything without worrying about alignment
* Make sure to run `Preparation.ipynb` before.

In [1]:
from os.path import join
def load_sents(dataset: str, translator: str, src_lang: str, tgt_lang: str) -> list[str]:
    filename = f'{dataset}-{translator}-{src_lang}-{tgt_lang}.txt'
    file_path = join('translations', filename)
    with open(file_path, 'r') as f:
        mt_sents = [s.strip() for s in f.readlines()]
    return mt_sents

In [2]:
import shutil
import os
os.makedirs('tmp_results', exist_ok=True)

In [3]:
from scripts.data_management import EuroParlManager, FloresPlusManager
dms = {
    'ep' : EuroParlManager(),
    'flores' : FloresPlusManager()
}

tls = ['gpt', 'deepl']

In [4]:
all_pairs = EuroParlManager.get_pairs()


In [5]:
from scripts.post_process import direct_triplet_align
for pair in all_pairs:
    s, t = pair
    for dataset in dms:
        if dataset == 'ep' and pair == ('it', 'el'):
            continue
        for translator in tls:
            mt_sents = load_sents(dataset, translator, s, t)
            src_sents, tgt_sents = dms[dataset].get_sentence_pairs(s, t, num_of_sents=400)
            direct_triplet_align(
                mt_sents=mt_sents,
                ref_sents=tgt_sents,
                src_sents=src_sents,
                src_lang=s,
                ref_lang=t,
                folder_path=join('tmp_results', f'{dataset}-{translator}')
            )

In [7]:
from scripts.scoring import ResultProducer
aligned = os.listdir('tmp_results')
for res in aligned:
    fp = join('tmp_results', res)
    files = os.listdir(fp)
    l2f = {f.replace('.jsonl', ''): join(fp, f) for f in files}
    rp = ResultProducer(label2files=l2f)
    rp.compute_results()
    rp.store_results(join('tmp_results', f'{res}.csv'))



In [10]:
from scripts.scoring import create_matrix_from_csv
ep_gpt = create_matrix_from_csv(join('tmp_results', 'ep-gpt.csv'))
ep_deepl = create_matrix_from_csv(join('tmp_results', 'ep-deepl.csv'))
flores_gpt = create_matrix_from_csv(join('tmp_results', 'flores-gpt.csv'))
flores_deepl = create_matrix_from_csv(join('tmp_results', 'flores-deepl.csv'))

### Matrices
* From source to target (left to right), the row is the source and the column is the target

In [16]:
ep_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,34.052207,27.403131,34.265758,33.450159,21.559752,31.853186,19.909345,26.493696,26.156095,29.329501
de,34.496495,,23.401336,7.483574,34.40574,4.767343,31.288819,24.302457,25.314173,27.390053,27.251319
el,32.528914,27.909468,,34.108317,37.581394,19.816198,36.058423,11.543177,12.862619,29.987619,27.174453
en,34.598924,27.088722,28.540416,,36.721217,5.877664,33.207951,27.096256,27.868022,23.898661,27.720709
es,36.416038,32.385556,15.403091,2.409827,,19.211941,35.952666,28.261688,15.050712,32.150601,28.047307
fi,28.984256,26.40335,22.939204,31.805305,29.777864,,30.513101,7.239586,6.729594,24.917295,24.41138
fr,33.007701,28.890542,28.057027,33.892693,37.496366,18.709931,,27.078451,27.220359,27.383797,27.204276
it,26.915048,22.830478,,29.144336,31.736969,13.175652,28.628564,,23.233386,25.929821,21.734194
nl,28.484463,22.562986,21.464066,29.693938,27.44581,16.068469,26.936332,22.018028,,22.855778,22.578141
pt,32.535884,28.399012,28.751309,33.078499,35.746317,5.846544,33.797273,25.771903,25.961316,,24.433565


In [12]:
ep_deepl

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,34.060691,28.320274,40.89269,37.117352,25.503815,35.695082,27.5673,30.864241,31.047434,31.234542
de,34.974381,,26.427654,37.683698,36.148667,24.58625,37.804653,27.186651,28.339887,30.970712,29.996975
el,34.401319,29.99263,,39.020138,38.186257,23.645183,37.261932,28.691005,28.528897,34.262699,29.280422
en,37.49733,32.547469,30.79929,,41.137497,24.369398,38.496851,29.162393,32.202903,34.195964,32.251568
es,37.065578,32.860527,30.20693,43.274294,,24.895837,39.912205,30.341695,30.271553,35.809157,30.512634
fi,32.623281,28.696167,25.109554,35.4883,32.0302,,32.506162,24.320832,26.955631,27.897197,26.504494
fr,32.871805,30.348471,27.817182,38.308544,37.703949,22.997572,,28.78898,29.003367,33.227476,28.527832
it,29.072912,26.528497,,33.204503,34.749799,19.833691,33.320046,,26.338769,29.982963,24.20684
nl,28.717135,25.253817,22.129751,31.239803,29.104525,19.464109,29.80866,23.729883,,26.387243,23.574642
pt,32.717717,30.738889,28.661167,37.206175,39.076059,23.158269,39.056324,28.814821,28.687616,,27.93815


In [13]:
flores_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,38.863723,25.144143,51.678229,26.76896,25.403951,43.613682,29.552197,28.913497,42.015165,38.96864
de,38.691221,,24.628724,48.811945,26.089909,25.572269,40.467246,29.130906,28.881781,38.349227,36.044331
el,34.773403,32.386091,,43.565306,25.183134,22.467562,39.093314,25.621634,26.434385,35.889013,32.381023
en,49.252663,43.614308,28.990484,,29.179662,29.36792,51.947871,32.661039,30.930165,51.370512,46.351554
es,28.640257,28.275379,0.22881,35.985613,,19.249954,32.255087,21.654174,23.112735,24.278693,27.263133
fi,0.293491,29.987002,0.217186,39.660204,0.650015,,36.203299,25.802701,0.599517,0.583201,27.622318
fr,36.655553,34.863355,24.748256,49.696842,26.63817,24.417397,,26.510702,26.660309,39.963644,35.290419
it,29.439101,31.180642,21.067091,39.206555,22.630972,20.852346,0.622156,,23.426438,29.244247,28.651399
nl,28.519843,28.786491,18.973567,36.991055,23.369889,18.381937,33.125362,24.152701,,30.540325,25.760948
pt,39.132374,36.787979,24.158032,55.545055,25.336856,24.432898,44.780088,27.240783,27.576622,,36.77419


In [14]:
flores_deepl

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,37.903019,27.065014,54.79953,26.358653,26.373987,44.505666,30.947992,29.349099,34.960044,39.809583
de,41.110841,,25.357988,49.230138,24.867585,25.864145,41.499288,30.679027,28.574291,33.325661,37.334801
el,34.583404,31.615394,,41.849369,24.529222,21.6342,38.631227,27.965881,24.619949,29.797411,31.002461
en,50.610947,44.393833,30.589381,,28.737538,29.775069,52.424001,34.797073,32.372457,42.252638,47.076802
es,30.376231,27.618031,20.542459,35.793684,,19.905015,35.569537,27.183703,24.40872,27.168136,27.571089
fi,33.174106,30.691418,21.968231,38.213989,21.449547,,36.97758,26.401138,24.790704,27.879875,30.11665
fr,38.416856,34.55113,25.433119,49.22722,26.064141,24.930215,,30.600824,27.320816,33.634741,35.322091
it,31.286225,29.542371,22.12738,37.348334,24.232551,20.625066,36.913005,,23.889913,27.824569,29.05266
nl,31.393276,29.895214,20.475395,36.490951,23.124973,21.546739,35.004903,26.1512,,27.540381,28.197053
pt,39.723654,36.616311,26.257181,53.701485,25.878094,24.627075,44.189374,31.001435,27.102937,,37.075581


## Evaluation After Alignment
* Concerns GPT4.1 only

In [22]:
from scripts.scoring import ResultProducer
dm2l2f = {}
aligned = [f for f in os.listdir('tmp_results') if not f.endswith('.csv')]
for res in aligned:
    if 'gpt' in res:
        fp = join('tmp_results', res)
        files = os.listdir(fp)
        l2f = {f.replace('.jsonl', ''): join(fp, f) for f in files}
        dm2l2f[res] = l2f


In [26]:
re_aligned = {
    'flores': join('alignments', join('flores', 'triplets')),
    'ep': join('alignments', join('ep', 'triplets')),
}

for key in dm2l2f:
    dm, tl = key.split('-')
    files = os.listdir(re_aligned[dm])
    for f in files:
        l2fkey = f.replace('.jsonl', '')
        dm2l2f[key][l2fkey] = join(re_aligned[dm], f)

In [28]:
for key in dm2l2f:
    rp = ResultProducer(label2files=dm2l2f[key])
    rp.compute_results()
    rp.store_results(join('tmp_results', f'{key}-new.csv'))

In [29]:
ep_gpt = create_matrix_from_csv(join('tmp_results', 'ep-gpt-new.csv'))
flores_gpt = create_matrix_from_csv(join('tmp_results', 'flores-gpt-new.csv'))

In [30]:
ep_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,34.052207,27.403131,34.265758,33.450159,21.559752,31.853186,24.941622,26.493696,26.156095,29.329501
de,34.496495,,23.401336,7.483574,34.40574,20.592041,31.288819,24.302457,25.314173,27.390053,27.251319
el,32.528914,27.909468,,34.108317,37.581394,19.816198,36.058423,26.241354,25.302648,29.987619,27.174453
en,34.598924,27.088722,28.540416,,36.721217,5.877664,33.207951,27.096256,27.868022,23.898661,27.720709
es,36.416038,32.385556,30.070853,2.409827,,19.211941,35.952666,28.261688,26.941189,32.150601,28.047307
fi,28.984256,26.40335,22.939204,31.805305,29.777864,,30.513101,22.30372,22.328122,24.917295,24.41138
fr,33.007701,28.890542,28.057027,33.892693,37.496366,18.709931,,27.078451,27.220359,27.383797,27.204276
it,26.915048,22.830478,,29.144336,31.736969,15.66986,28.628564,,23.233386,25.929821,21.734194
nl,28.484463,22.562986,21.464066,29.693938,27.44581,16.068469,26.936332,22.018028,,22.855778,22.578141
pt,32.535884,28.399012,28.751309,33.078499,35.746317,18.89753,33.797273,25.771903,25.961316,,24.433565


In [31]:
flores_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,38.863723,25.144143,51.678229,26.76896,25.403951,43.613682,29.552197,28.913497,42.015165,38.96864
de,38.691221,,24.628724,48.811945,26.089909,25.572269,40.467246,29.130906,28.881781,38.349227,36.044331
el,34.773403,32.386091,,43.565306,25.183134,22.467562,39.093314,25.621634,26.434385,35.889013,32.381023
en,49.252663,43.614308,28.990484,,29.179662,29.36792,51.947871,32.661039,30.930165,51.370512,46.351554
es,28.640257,28.275379,19.797762,35.985613,,19.249954,32.255087,21.654174,23.112735,24.278693,27.263133
fi,30.098796,29.987002,21.327814,39.660204,23.51701,,36.203299,25.802701,25.402128,33.296778,27.622318
fr,36.655553,34.863355,24.748256,49.696842,26.63817,24.417397,,26.510702,26.660309,39.963644,35.290419
it,29.439101,31.180642,21.067091,39.206555,22.630972,20.852346,33.154314,,23.426438,29.244247,28.651399
nl,28.519843,28.786491,18.973567,36.991055,23.369889,18.381937,33.125362,24.152701,,30.540325,25.760948
pt,39.132374,36.787979,24.158032,55.545055,25.336856,24.432898,44.780088,27.240783,27.576622,,36.77419
