# GPT Testing
* Showcases how different prompts affect the output
* We choose the prompt used for `gpt1` using the model `gpt-4.1` as it seems to be the most stable
* Note, some issues can be dealt with using `bertalign`, others can only be solved by re-running it, as GPT is never fully deterministic. 
* `temperature=0` makes it as close to determnistic as possible but random sampling is still present behind the scenes

In [1]:
from scripts.data_management import FloresPlusManager
from scripts.translators import GPT4Client
from scripts.util import MyLogger, LANG_ISO, split_sents
logger = MyLogger(logfile='log.jsonl')

dm = FloresPlusManager()
de_sents, en_sents = dm.get_sentence_pairs('de', 'en', num_of_sents=100)
real_de_sents = split_sents('\n'.join(de_sents), 'de')
real_en_sents = split_sents('\n'.join(en_sents), 'en')
len(de_sents), len(en_sents), len(real_de_sents), len(real_en_sents)

(100, 100, 106, 105)

In [2]:
gpt1 = GPT4Client(logger=logger)
out = gpt1.translate_document(
    text=de_sents,
    src_lang='de',
    tgt_lang='en'
)
real_sents = split_sents('\n'.join(out), 'en')
len(out), len(real_sents)

(100, 106)

In [3]:
gpt2 = GPT4Client(logger=logger)
gpt2.user_prompt = lambda src_lang, tgt_lang, text: text
out = gpt2.translate_document(
    text=de_sents,
    src_lang='de',
    tgt_lang='en'
)
real_sents = split_sents('\n'.join(out), 'en')
len(out), len(real_sents)

(100, 106)

In [4]:
def sys_prompt(src_lang, tgt_lang):
    p1 = f"You are a {LANG_ISO[src_lang]}-to-{LANG_ISO[tgt_lang]} translator."
    p2 = f"Please make sure to keep the same formatting, do not add more newlines."
    return '\n'.join([p1, p2])

gpt3 = GPT4Client(logger=logger)
gpt3.user_prompt = lambda src_lang, tgt_lang, text: text
gpt3.sys_prompt = sys_prompt

out = gpt3.translate_document(
    text=de_sents,
    src_lang='de',
    tgt_lang='en'
)
real_sents = split_sents('\n'.join(out), 'en')
len(out), len(real_sents)

(100, 106)

In [5]:
gpt4 = GPT4Client(logger=logger, model='gpt-4o')
out = gpt4.translate_document(
    text=de_sents,
    src_lang='de',
    tgt_lang='en'
)
real_sents = split_sents('\n'.join(out), 'en')
len(out), len(real_sents)

(1, 104)

In [6]:
gpt5 = GPT4Client(logger=MyLogger(logfile='log.jsonl'), model='gpt-4o')
gpt5.user_prompt = lambda src_lang, tgt_lang, text: text
out = gpt5.translate_document(
    text=de_sents,
    src_lang='de',
    tgt_lang='en'
)
real_sents = split_sents('\n'.join(out), 'en')
len(out), len(real_sents)

(63, 136)

In [7]:
def sys_prompt(src_lang, tgt_lang):
    p1 = f"You are a {LANG_ISO[src_lang]}-to-{LANG_ISO[tgt_lang]} translator."
    p2 = f"Please make sure to keep the same formatting, do not add more newlines."
    return '\n'.join([p1, p2])

gpt6 = GPT4Client(logger=logger, model='gpt-4o')
gpt6.user_prompt = lambda src_lang, tgt_lang, text: text
gpt6.sys_prompt = sys_prompt

out = gpt6.translate_document(
    text=de_sents,
    src_lang='de',
    tgt_lang='en'
)
real_sents = split_sents('\n'.join(out), 'en')
len(out), len(real_sents)

(1, 104)