## Evaluation
* We just evaluate everything without worrying about alignment
* Make sure to run `Preparation.ipynb` before.

In [6]:
import shutil
import os
os.makedirs('tmp_results', exist_ok=True)

In [7]:
from scripts.data_management import EuroParlManager, FloresPlusManager
dms = {
    'ep' : EuroParlManager(),
    'flores' : FloresPlusManager()
}

tls = ['gpt', 'deepl']

In [8]:
all_pairs = EuroParlManager.get_pairs()


In [9]:
from scripts.post_process import direct_triplet_align, load_mt_sents
from os.path import join
for pair in all_pairs:
    s, t = pair
    for dataset in dms:
        if dataset == 'ep' and pair == ('it', 'el'):
            continue
        for translator in tls:
            mt_sents = load_mt_sents(dataset, translator, s, t)
            src_sents, tgt_sents = dms[dataset].get_sentence_pairs(s, t, num_of_sents=400)
            direct_triplet_align(
                mt_sents=mt_sents,
                ref_sents=tgt_sents,
                src_sents=src_sents,
                src_lang=s,
                ref_lang=t,
                folder_path=join('tmp_results', f'{dataset}-{translator}')
            )

In [10]:
from scripts.scoring import ResultProducer
aligned = [o for o in os.listdir('tmp_results') if not o.endswith('.csv')]
for res in aligned:
    fp = join('tmp_results', res)
    files = os.listdir(fp)
    l2f = {f.replace('.jsonl', ''): join(fp, f) for f in files}
    rp = ResultProducer(label2files=l2f)
    rp.compute_results()
    rp.store_results(join('tmp_results', f'{res}.csv'))



In [18]:
from scripts.scoring import create_matrix_from_csv
from os.path import join
ep_gpt = create_matrix_from_csv(join('tmp_results', 'ep-gpt.csv'))
ep_deepl = create_matrix_from_csv(join('tmp_results', 'ep-deepl.csv'))
flores_gpt = create_matrix_from_csv(join('tmp_results', 'flores-gpt.csv'))
flores_deepl = create_matrix_from_csv(join('tmp_results', 'flores-deepl.csv'))

### Matrices
* From source to target (left to right), the row is the source and the column is the target

In [19]:
ep_gpt.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,34.1,27.4,34.3,33.5,21.6,31.9,19.9,26.5,26.2,29.3
de,34.5,,23.4,7.5,34.4,4.8,31.3,24.3,25.3,27.4,27.3
el,32.5,27.9,,34.1,37.6,19.8,36.1,11.5,12.9,30.0,27.2
en,34.6,27.1,28.5,,36.7,5.9,33.2,27.1,27.9,23.9,27.7
es,36.4,32.4,15.4,2.4,,19.2,36.0,28.3,15.1,32.2,28.0
fi,29.0,26.4,22.9,31.8,29.8,,30.5,7.2,6.7,24.9,24.4
fr,33.0,28.9,28.1,33.9,37.5,18.7,,27.1,27.2,27.4,27.2
it,26.9,22.8,,29.1,31.7,13.2,28.6,,23.2,25.9,21.7
nl,28.5,22.6,21.5,29.7,27.4,16.1,26.9,22.0,,22.9,22.6
pt,32.5,28.4,28.8,33.1,35.7,5.8,33.8,25.8,26.0,,24.4


In [20]:
ep_deepl.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,34.1,28.3,40.9,37.1,25.5,35.7,27.6,30.9,31.0,31.2
de,35.0,,26.4,37.7,36.1,24.6,37.8,27.2,28.3,31.0,30.0
el,34.4,30.0,,39.0,38.2,23.6,37.3,28.7,28.5,34.3,29.3
en,37.5,32.5,30.8,,41.1,24.4,38.5,29.2,32.2,34.2,32.3
es,37.1,32.9,30.2,43.3,,24.9,39.9,30.3,30.3,35.8,30.5
fi,32.6,28.7,25.1,35.5,32.0,,32.5,24.3,27.0,27.9,26.5
fr,32.9,30.3,27.8,38.3,37.7,23.0,,28.8,29.0,33.2,28.5
it,29.1,26.5,,33.2,34.7,19.8,33.3,,26.3,30.0,24.2
nl,28.7,25.3,22.1,31.2,29.1,19.5,29.8,23.7,,26.4,23.6
pt,32.7,30.7,28.7,37.2,39.1,23.2,39.1,28.8,28.7,,27.9


In [21]:
flores_gpt.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,38.9,25.1,51.7,26.8,25.4,43.6,29.6,28.9,42.0,39.0
de,38.7,,24.6,48.8,26.1,25.6,40.5,29.1,28.9,38.3,36.0
el,34.8,32.4,,43.6,25.2,22.5,39.1,25.6,26.4,35.9,32.4
en,49.3,43.6,29.0,,29.2,29.4,51.9,32.7,30.9,51.4,46.4
es,28.6,28.3,0.2,36.0,,19.2,32.3,21.7,23.1,24.3,27.3
fi,0.3,30.0,0.2,39.7,0.7,,36.2,25.8,0.6,0.6,27.6
fr,36.7,34.9,24.7,49.7,26.6,24.4,,26.5,26.7,40.0,35.3
it,29.4,31.2,21.1,39.2,22.6,20.9,0.6,,23.4,29.2,28.7
nl,28.5,28.8,19.0,37.0,23.4,18.4,33.1,24.2,,30.5,25.8
pt,39.1,36.8,24.2,55.5,25.3,24.4,44.8,27.2,27.6,,36.8


In [22]:
flores_deepl.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,37.9,27.1,54.8,26.4,26.4,44.5,30.9,29.3,35.0,39.8
de,41.1,,25.4,49.2,24.9,25.9,41.5,30.7,28.6,33.3,37.3
el,34.6,31.6,,41.8,24.5,21.6,38.6,28.0,24.6,29.8,31.0
en,50.6,44.4,30.6,,28.7,29.8,52.4,34.8,32.4,42.3,47.1
es,30.4,27.6,20.5,35.8,,19.9,35.6,27.2,24.4,27.2,27.6
fi,33.2,30.7,22.0,38.2,21.4,,37.0,26.4,24.8,27.9,30.1
fr,38.4,34.6,25.4,49.2,26.1,24.9,,30.6,27.3,33.6,35.3
it,31.3,29.5,22.1,37.3,24.2,20.6,36.9,,23.9,27.8,29.1
nl,31.4,29.9,20.5,36.5,23.1,21.5,35.0,26.2,,27.5,28.2
pt,39.7,36.6,26.3,53.7,25.9,24.6,44.2,31.0,27.1,,37.1


## Evaluation After Alignment

In [16]:
from scripts.scoring import ResultProducer
from os.path import join
import os
dm2l2f = {}
aligned = [f for f in os.listdir('tmp_results') if not f.endswith('.csv')]
for res in aligned:
    fp = join('tmp_results', res)
    files = os.listdir(fp)
    l2f = {f.replace('.jsonl', ''): join(fp, f) for f in files}
    dm2l2f[res] = l2f


In [17]:
for key in dm2l2f:
    dm, tl = key.split('-')
    folder_path = join('triplets', f'{dm}-{tl}')
    files = os.listdir(folder_path)
    print(len(files))
    for f in files:
        l2fkey = f.replace('.jsonl', '')
        dm2l2f[key][l2fkey] = join(folder_path, f)

110
110
110
110


In [18]:
for key in dm2l2f:
    rp = ResultProducer(label2files=dm2l2f[key])
    rp.compute_results()
    rp.store_results(join('tmp_results', f'{key}-n.csv'))

In [24]:
from scripts.scoring import create_matrix_from_csv
ep_gpt_a = create_matrix_from_csv(join('tmp_results', 'ep-gpt-n.csv'))
flores_gpt_a = create_matrix_from_csv(join('tmp_results', 'flores-gpt-n.csv'))

ep_deepl_a = create_matrix_from_csv(join('tmp_results', 'ep-deepl-n.csv'))
flores_deepl_a = create_matrix_from_csv(
    join('tmp_results', 'flores-deepl-n.csv'))

In [25]:
ep_gpt_a.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,34.1,27.1,34.6,33.2,21.9,31.9,25.1,26.9,26.4,29.7
de,34.8,,23.4,32.6,35.0,20.4,31.1,25.0,25.5,27.2,27.7
el,32.5,28.1,,34.3,37.8,19.8,36.4,26.1,25.3,30.1,27.2
en,34.7,27.4,28.8,,36.9,19.2,33.4,26.9,28.2,24.3,28.4
es,36.4,32.6,30.6,36.6,,19.3,35.9,28.6,27.4,32.4,28.4
fi,28.9,26.5,22.7,31.4,29.8,,30.3,22.3,22.4,24.7,24.6
fr,33.0,28.7,27.8,34.0,37.5,18.3,,27.0,27.0,27.4,27.3
it,27.0,23.3,24.3,29.4,32.4,15.5,29.2,,23.4,26.6,22.2
nl,30.8,23.8,22.9,31.8,29.8,17.6,28.8,23.6,,24.3,25.0
pt,32.7,28.4,28.6,33.1,35.6,18.8,34.0,26.3,26.3,,24.6


In [26]:
flores_gpt_a.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,38.7,25.4,51.6,26.9,25.5,43.8,29.6,28.9,42.1,39.0
de,38.9,,25.0,49.0,26.4,26.0,41.0,29.3,29.2,38.2,36.4
el,34.7,32.3,,43.5,25.4,22.5,39.1,25.6,26.5,35.8,32.5
en,49.2,43.6,28.9,,29.1,29.4,51.8,32.6,30.9,51.2,46.3
es,28.8,28.5,19.9,36.2,,19.3,32.4,21.7,23.3,24.4,27.3
fi,30.4,30.1,21.4,40.2,23.1,,36.6,25.8,25.4,32.7,27.8
fr,36.8,35.1,25.0,49.8,26.7,24.5,,26.5,26.7,40.1,35.4
it,29.4,31.0,21.1,39.6,22.7,20.9,33.4,,23.5,29.2,28.5
nl,28.3,28.5,19.2,37.3,23.5,18.8,33.4,23.9,,30.9,25.5
pt,39.3,37.0,24.3,55.8,25.5,24.6,45.1,27.4,27.8,,36.9


In [27]:
ep_deepl_a.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,34.1,28.3,41.0,36.2,25.8,36.0,27.9,31.1,31.2,31.5
de,35.3,,26.9,37.8,36.4,24.7,38.1,27.5,28.5,31.1,30.4
el,34.3,30.2,,39.3,38.4,23.6,37.7,28.8,28.8,34.5,29.5
en,37.5,33.0,31.3,,41.0,24.7,38.8,29.0,32.4,34.6,32.9
es,36.8,33.0,30.4,43.3,,25.0,39.7,30.4,30.2,35.8,30.8
fi,32.6,28.9,25.4,35.8,31.7,,32.6,24.8,27.3,28.0,27.0
fr,32.7,30.1,27.6,38.4,37.5,22.4,,28.5,28.8,33.3,28.4
it,29.4,27.3,26.2,33.9,35.4,20.0,34.1,,26.4,30.8,24.7
nl,31.1,27.2,23.4,33.4,31.1,21.9,31.7,25.2,,28.4,25.6
pt,32.7,30.6,28.7,37.2,38.9,23.4,39.2,29.1,29.1,,28.0


In [28]:
flores_deepl_a.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,37.7,27.2,54.7,26.5,26.3,44.8,31.0,29.5,35.1,39.7
de,41.2,,25.5,49.7,25.0,26.3,42.1,31.0,28.8,33.3,37.8
el,34.5,31.6,,41.8,24.7,21.8,38.7,28.0,24.7,29.8,31.2
en,50.5,44.3,30.5,,28.8,29.6,52.4,34.8,32.4,42.2,47.0
es,30.5,27.7,20.6,35.9,,20.1,35.7,27.2,24.7,27.3,27.6
fi,33.2,30.8,22.2,39.0,21.5,,37.2,26.6,25.1,28.4,30.2
fr,38.6,34.7,25.4,49.3,26.2,25.0,,30.6,27.4,33.8,35.4
it,31.3,29.3,22.2,37.6,24.2,20.7,36.8,,24.0,28.0,28.9
nl,31.3,29.7,20.4,36.5,23.0,22.0,35.1,26.2,,27.9,28.2
pt,39.9,36.7,26.4,54.0,26.0,24.7,44.3,31.2,27.3,,37.3


## Alignment Differences

In [30]:
diff_flores_deepl = (flores_deepl - flores_deepl_a).round(1)
diff_flores_deepl

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,0.2,-0.2,0.1,-0.2,0.0,-0.3,-0.1,-0.1,-0.1,0.1
de,-0.1,,-0.1,-0.5,-0.1,-0.4,-0.6,-0.3,-0.2,0.0,-0.4
el,0.1,0.0,,0.0,-0.1,-0.2,-0.1,0.0,-0.1,-0.0,-0.2
en,0.1,0.1,0.0,,-0.1,0.2,0.0,0.0,-0.0,0.1,0.1
es,-0.1,-0.1,-0.1,-0.1,,-0.2,-0.1,-0.1,-0.3,-0.1,-0.0
fi,-0.0,-0.1,-0.2,-0.7,-0.1,,-0.2,-0.2,-0.3,-0.5,-0.1
fr,-0.2,-0.1,0.0,-0.1,-0.2,-0.1,,0.0,-0.1,-0.1,-0.1
it,-0.0,0.2,-0.1,-0.2,0.1,-0.1,0.1,,-0.1,-0.2,0.1
nl,0.1,0.2,0.1,-0.1,0.1,-0.4,-0.1,-0.1,,-0.4,0.0
pt,-0.2,-0.1,-0.2,-0.3,-0.1,-0.1,-0.1,-0.2,-0.2,,-0.3


In [31]:
diff_ep_deepl = (ep_deepl - ep_deepl_a).round(1)
diff_ep_deepl

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,-0.1,0.1,-0.2,0.9,-0.3,-0.3,-0.4,-0.3,-0.1,-0.3
de,-0.3,,-0.4,-0.2,-0.2,-0.1,-0.3,-0.3,-0.2,-0.2,-0.4
el,0.1,-0.2,,-0.3,-0.2,0.0,-0.4,-0.1,-0.3,-0.3,-0.2
en,0.0,-0.5,-0.5,,0.1,-0.3,-0.3,0.1,-0.2,-0.4,-0.6
es,0.3,-0.1,-0.2,0.0,,-0.1,0.2,-0.0,0.0,0.0,-0.3
fi,0.1,-0.2,-0.3,-0.3,0.4,,-0.0,-0.4,-0.3,-0.1,-0.5
fr,0.2,0.2,0.2,-0.1,0.2,0.6,,0.3,0.2,-0.1,0.1
it,-0.3,-0.8,,-0.7,-0.7,-0.2,-0.8,,-0.1,-0.8,-0.5
nl,-2.4,-1.9,-1.3,-2.1,-2.0,-2.4,-1.9,-1.5,,-2.0,-2.1
pt,0.1,0.2,0.0,-0.0,0.2,-0.3,-0.1,-0.3,-0.4,,-0.1


In [32]:
diff_ep_gpt = (ep_gpt - ep_gpt_a).round(1)
diff_ep_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,-0.1,0.3,-0.3,0.2,-0.3,-0.1,-5.2,-0.4,-0.2,-0.4
de,-0.3,,-0.0,-25.1,-0.6,-15.7,0.1,-0.7,-0.2,0.2,-0.5
el,0.0,-0.2,,-0.2,-0.2,0.0,-0.4,-14.5,-12.4,-0.1,-0.1
en,-0.1,-0.4,-0.3,,-0.2,-13.3,-0.2,0.2,-0.3,-0.4,-0.7
es,0.0,-0.2,-15.2,-34.2,,-0.1,0.1,-0.4,-12.3,-0.2,-0.3
fi,0.0,-0.1,0.2,0.4,0.0,,0.2,-15.1,-15.7,0.3,-0.2
fr,0.0,0.2,0.3,-0.1,-0.0,0.4,,0.1,0.2,-0.0,-0.1
it,-0.1,-0.5,,-0.3,-0.6,-2.3,-0.6,,-0.2,-0.6,-0.5
nl,-2.3,-1.3,-1.4,-2.1,-2.3,-1.5,-1.9,-1.6,,-1.4,-2.4
pt,-0.2,0.0,0.1,-0.0,0.1,-12.9,-0.2,-0.5,-0.4,,-0.2


In [33]:
diff_flores_gpt = (flores_gpt - flores_gpt_a).round(1)
diff_flores_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,0.1,-0.2,0.1,-0.2,-0.1,-0.2,-0.1,0.0,-0.1,-0.0
de,-0.2,,-0.3,-0.2,-0.3,-0.4,-0.6,-0.2,-0.3,0.1,-0.3
el,0.1,0.0,,0.0,-0.2,0.0,0.0,-0.0,-0.1,0.1,-0.1
en,0.1,0.0,0.0,,0.1,-0.1,0.1,0.1,-0.0,0.2,0.1
es,-0.2,-0.2,-19.6,-0.2,,-0.1,-0.2,-0.0,-0.2,-0.1,-0.1
fi,-30.2,-0.1,-21.1,-0.6,-22.4,,-0.4,-0.0,-24.8,-32.1,-0.2
fr,-0.2,-0.2,-0.2,-0.1,-0.1,-0.0,,0.0,-0.0,-0.1,-0.1
it,0.0,0.2,0.0,-0.3,-0.0,-0.0,-32.7,,-0.1,0.0,0.2
nl,0.2,0.3,-0.2,-0.3,-0.2,-0.5,-0.2,0.2,,-0.3,0.2
pt,-0.2,-0.2,-0.1,-0.3,-0.1,-0.1,-0.3,-0.1,-0.2,,-0.1


In [35]:
import numpy as np
labels = ['deepl flores', 'deepl ep', 'gpt flores', 'gpt ep']
diffs = [diff_flores_deepl, diff_ep_deepl, diff_flores_gpt, diff_ep_gpt]
for label, diff in zip(labels, diffs):
    print(label)
    print('mean', f'{np.nanmean(diff.values):.2f}')
    print('max', f'{np.nanmax(diff.values):.2f}')
    print('min', f'{np.nanmin(diff.values):.2f}')
    print()

deepl flores
mean -0.09
max 0.20
min -0.70

deepl ep
mean -0.32
max 0.90
min -2.40

gpt flores
mean -1.74
max 0.30
min -32.70

gpt ep
mean -2.16
max 0.60
min -34.20



* Alignment, as expected, as strong impact on GPT4.1 scores, as it was more likely to mis-align
* Less impact on DeepL scores but still notable.

## Opus100

In [1]:
from scripts.data_management import Opus100Manager
dm = Opus100Manager()
tls = ['gpt', 'deepl']
pairs = Opus100Manager.get_pairs()

In [2]:
from scripts.post_process import direct_triplet_align, load_mt_sents
from os.path import join
for pair in pairs:
    s, t = pair
    for translator in tls:
        mt_sents = load_mt_sents('opus', translator, s, t)
        src_sents, tgt_sents = dm.get_sentence_pairs(
            s, t, num_of_sents=400)
        direct_triplet_align(
            mt_sents=mt_sents,
            ref_sents=tgt_sents,
            src_sents=src_sents,
            src_lang=s,
            ref_lang=t,
            folder_path=join('opus_results', f'{'opus'}-{translator}')
        )

In [4]:
from scripts.scoring import ResultProducer
import os
aligned = [o for o in os.listdir('opus_results') if not o.endswith('.csv')]
for res in aligned:
    fp = join('opus_results', res)
    files = os.listdir(fp)
    l2f = {f.replace('.jsonl', ''): join(fp, f) for f in files}
    rp = ResultProducer(label2files=l2f)
    rp.compute_results()
    rp.store_results(join('opus_results', f'{res}.csv'))

In [5]:
from scripts.scoring import create_matrix_from_csv
opus_gpt = create_matrix_from_csv(join('opus_results', 'opus-gpt.csv'))
opus_deepl = create_matrix_from_csv(join('opus_results', 'opus-deepl.csv'))

In [10]:
opus_gpt.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,,,37.5,,,,,,,
de,,,,10.7,,,,,,,
el,,,,33.2,,,,,,,
en,35.5,31.0,28.3,,39.3,20.2,36.0,32.3,30.3,29.6,30.5
es,,,,43.1,,,,,,,
fi,,,,31.0,,,,,,,
fr,,,,39.5,,,,,,,
it,,,,36.8,,,,,,,
nl,,,,31.0,,,,,,,
pt,,,,0.1,,,,,,,


In [11]:
opus_deepl.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,,,40.5,,,,,,,
de,,,,36.3,,,,,,,
el,,,,34.7,,,,,,,
en,38.9,32.4,29.2,,40.0,25.1,40.3,34.2,32.1,34.2,34.0
es,,,,44.8,,,,,,,
fi,,,,32.6,,,,,,,
fr,,,,41.6,,,,,,,
it,,,,37.8,,,,,,,
nl,,,,33.9,,,,,,,
pt,,,,40.2,,,,,,,


### Opus After Alignment

In [3]:
from scripts.scoring import ResultProducer
import os
from os.path import join

aligned = [o for o in os.listdir('triplets') if o.startswith('opus')]
for res in aligned:
    fp = join('triplets', res)
    files = os.listdir(fp)
    l2f = {f.replace('.jsonl', ''): join(fp, f) for f in files}
    rp = ResultProducer(label2files=l2f)
    rp.compute_results()
    rp.store_results(join('opus_results', f'{res}-n.csv'))

In [9]:
from scripts.scoring import create_matrix_from_csv
opus_gpt = create_matrix_from_csv(join('opus_results', 'opus-gpt.csv'))
opus_deepl = create_matrix_from_csv(join('opus_results', 'opus-deepl.csv'))


opus_gpt_a = create_matrix_from_csv(join('opus_results', 'opus-gpt-n.csv'))
opus_deepl_a = create_matrix_from_csv(join('opus_results', 'opus-deepl-n.csv'))

In [10]:
opus_gpt_a.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,,,39.2,,,,,,,
de,,,,36.6,,,,,,,
el,,,,34.6,,,,,,,
en,37.0,31.6,29.8,,40.5,21.1,36.9,33.5,32.2,31.0,33.0
es,,,,44.5,,,,,,,
fi,,,,33.5,,,,,,,
fr,,,,40.3,,,,,,,
it,,,,37.1,,,,,,,
nl,,,,31.8,,,,,,,
pt,,,,38.9,,,,,,,


In [11]:
opus_deepl_a.round(1)

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,,,41.7,,,,,,,
de,,,,37.3,,,,,,,
el,,,,36.2,,,,,,,
en,40.6,33.3,30.8,,41.6,26.6,41.3,35.7,33.9,35.8,37.4
es,,,,46.4,,,,,,,
fi,,,,35.2,,,,,,,
fr,,,,42.4,,,,,,,
it,,,,38.1,,,,,,,
nl,,,,35.1,,,,,,,
pt,,,,41.5,,,,,,,


### Impact of Alignment
* Positive Number: Improvement

In [15]:
diff_opus_gpt = (opus_gpt_a - opus_gpt).round(1)
diff_opus_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,,,1.7,,,,,,,
de,,,,25.9,,,,,,,
el,,,,1.5,,,,,,,
en,1.5,0.6,1.5,,1.2,0.8,0.9,1.2,1.8,1.5,2.4
es,,,,1.4,,,,,,,
fi,,,,2.4,,,,,,,
fr,,,,0.8,,,,,,,
it,,,,0.2,,,,,,,
nl,,,,0.8,,,,,,,
pt,,,,38.8,,,,,,,


In [16]:
diff_opus_deepl = (opus_deepl_a-opus_deepl).round(1)
diff_opus_gpt

Unnamed: 0,da,de,el,en,es,fi,fr,it,nl,pt,sv
da,,,,1.7,,,,,,,
de,,,,25.9,,,,,,,
el,,,,1.5,,,,,,,
en,1.5,0.6,1.5,,1.2,0.8,0.9,1.2,1.8,1.5,2.4
es,,,,1.4,,,,,,,
fi,,,,2.4,,,,,,,
fr,,,,0.8,,,,,,,
it,,,,0.2,,,,,,,
nl,,,,0.8,,,,,,,
pt,,,,38.8,,,,,,,


In [36]:
import numpy as np
labels = ['opus_gpt', 'opus_deepl']
diffs = [diff_opus_gpt, diff_opus_deepl]
for label, diff in zip(labels, diffs):
    print(label)
    print('mean', f'{np.nanmean(diff.values):.2f}')
    print('max', f'{np.nanmax(diff.values):.2f}')
    print('min', f'{np.nanmin(diff.values):.2f}')
    print()

opus_gpt
mean 4.44
max 38.80
min 0.20

opus_deepl
mean 1.52
max 3.40
min 0.20

