# GPT Experiment

* Setup the task

In [1]:
from scripts.task import TranslationTask
from scripts.data_management import EuroParlManager
from scripts.translators import GPTClient
from scripts.logger import MyLogger
from os.path import join
from random import sample, seed
seed(64)
possible = [tuple(pair.split('-')) for pair in EuroParlManager.EP_PAIRS]
extended = [(pair[1], pair[0]) for pair in possible]
possible = possible + extended
some_pairs = sample(sorted(possible), k=4)

example_folder = 'exmpl'
logfile = join(example_folder, 'log.jsonl')


dm = EuroParlManager()
logger = MyLogger(logfile=logfile)
client_gpt = GPTClient(logger=logger)

mt_folder_gpt = join(example_folder, client_gpt.model)

In [2]:
some_pairs

[('fr', 'da'), ('de', 'fr'), ('nl', 'da'), ('it', 'pt')]

In [3]:
task_gpt = TranslationTask(
    target_pairs=some_pairs,
    dm=dm,
    client=client_gpt,
    logger=logger,
    mt_folder=mt_folder_gpt,
    num_of_sents=400
)

In [4]:
task_gpt.run()

400 translated from fr to da
400 translated from de to fr
125 translated from nl to da
400 translated from it to pt


* Post-Processing

In [6]:
from scripts.post_process import direct_triplet_align
from scripts.util import load_sents

for pair in some_pairs:
    s, t = pair
    src_sents, tgt_sents = dm.get_sentence_pairs(s, t, num_of_sents=400)
    mt_sents = load_sents(mt_folder_gpt, s, t)
    direct_triplet_align(
        mt_sents=mt_sents,
        ref_sents=tgt_sents,
        src_sents=src_sents,
        src_lang=s,
        ref_lang=t,
        folder_path='tmp_gpt'
    )

* Eval

In [8]:
from scripts.scoring import ResultProducer
import os
l2f_gpt = {f.replace('.jsonl', ''): join('tmp_gpt', f)
             for f in os.listdir('tmp_gpt') if f.endswith('.jsonl')}


rp_gpt = ResultProducer(label2files=l2f_gpt)
rp_gpt.compute_results()

In [9]:
rp_gpt.display_results()

   Label       BLEU       chrF
0  de-fr  31.128177  59.199907
1  fr-da  32.507761  59.628156
2  it-pt  26.396587  55.420682
3  nl-da  24.475386  51.354379


* `nl-da` was translated correctly but not all sentences!

## Retry

In [12]:
from string import Template


USR_TEMPL = Template(
    "Translate the following $src_lang sentences into $tgt_lang.\n" 
    "Please make sure to keep the same formatting, do not add more newlines.\n" 
    "You are not allowed to omit anything.\n"
    "Here is the text:")

client = GPTClient(usr_templ=USR_TEMPL, logger=logger)
mt_folder_gpt = join(example_folder, join(client_gpt.model, 'retry'))

task_gpt = TranslationTask(
    target_pairs=[('nl', 'da')],
    dm=dm,
    client=client_gpt,
    logger=logger,
    mt_folder=mt_folder_gpt,
    num_of_sents=400
)

In [13]:
task_gpt.run()

400 translated from nl to da
