In [4]:
import json
from pathlib import Path

from sacrebleu import corpus_bleu

### Evaluation

In [5]:
def evaluate() -> None:
    modes = ['ro-en', 'en-ro']
    versions = ['base', 'medium', 'large']

    for log_dir in [str(x) for x in Path(f'../../../generate/translate').glob('*') if x.is_dir()]:
        trained_version = log_dir.split('/')[-1]
        print(f'Trained version {trained_version}\n')

        for mode in modes:
            for version in versions:
                result = {}

                for path_log in [str(x) for x in Path(f'{log_dir}/{mode}/{version}').glob("**/*.json")]:

                    hyp = []
                    refs = []
                    name_eval = path_log.split('/')[-1].replace('.json', '')

                    with open(path_log, 'r') as input_file:
                        data = json.load(input_file)

                    for example in data:
                        refs.append(example['original'])
                        hyp.append(
                            example['predict'].replace('Romanian:', '').replace('English:', ''))  # remove artefact

                    refs = [refs]
                    bleu_score = corpus_bleu(hyp, refs)
                    result[name_eval] = bleu_score
                    print(f'Eval: {name_eval} Bleu: {bleu_score.score}')

                Path(f'../../../log/translate/{trained_version}').mkdir(parents=True, exist_ok=True)
                with open(f'../../../log/translate/{trained_version}/{version}-{mode}.txt', 'w+') as output_file:
                    for k, v in result.items():
                        output_file.write(f'{k}: {v}\n')

        print('\n\n')

### Run evaluation

In [6]:
evaluate()

Trained version v1

Eval: base-beam-search-4-ro-en Bleu: 31.260847205131878
Eval: base-top-7-ro-en Bleu: 30.36567880295048
Eval: base-beam-search-8-ro-en Bleu: 31.386911978835382
Eval: base-greedy-ro-en Bleu: 30.36567880295048
Eval: medium-top-7-ro-en Bleu: 32.483383293661205
Eval: medium-beam-search-3-ro-en Bleu: 34.07760950302437
Eval: medium-beam-search-8-ro-en Bleu: 34.15754674349059
Eval: medium-greedy-ro-en Bleu: 32.483383293661205
Eval: large-top-7-ro-en Bleu: 33.68933498954746
Eval: large-beam-search-8-ro-en Bleu: 34.51390382816909
Eval: large-greedy-ro-en Bleu: 33.68933498954746
Eval: large-beam-search-3-ro-en Bleu: 34.40206883488614
Eval: base-top-7-en-ro Bleu: 20.271535578517526
Eval: base-greedy-en-ro Bleu: 20.271535578517526
Eval: base-beam-search-8-en-ro Bleu: 22.95088084617368
Eval: base-beam-search-4-en-ro Bleu: 22.308151691418963
Eval: medium-greedy-en-ro Bleu: 22.18266493979053
Eval: medium-beam-search-4-en-ro Bleu: 24.025631506972175
Eval: medium-top-7-en-ro Bleu: 22