# LLM Skyline
GPT has demonstrated excellent performance on the task using in-context learning. Here, we run a systematic evaluation, in order to provide a *skyline*, i.e. near-optimal automated system. Of course, making OpenAI API calls might not be ideal in real usage for a number of reasons.

In [29]:
import pandas as pd
import os

# Read all of the data into a single combined DF
folder_path = '../americasnlp2024/ST2_EducationalMaterials/data/'
all_data = []
for filename in os.listdir(folder_path):
    if filename.endswith('.tsv'):
        split_df = pd.read_csv(os.path.join(folder_path, filename), delimiter='\t')
        [split_df['language'], split_df['split']] = filename[:-4].split("-")
        all_data.append(split_df)

df = pd.concat(all_data, ignore_index=True)
df['Formatted'] = df.apply(lambda row: f"Id: {row['ID']}\nSource: {row['Source']}\nChange: {row['Change']}\nTarget: {row['Target']}", axis=1)
df['Formatted_Covered'] = df.apply(lambda row: f"Id: {row['ID']}\nSource: {row['Source']}\nChange: {row['Change']}\nTarget: ", axis=1)
df['Predicted Target'] = ''
df

Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Maya0119,Tene' áak'ab kin bin merkaado,TYPE:NEG,Tene' ma' áak'ab kin bin merkaadoi',maya,train,Id: Maya0119\nSource: Tene' áak'ab kin bin mer...,Id: Maya0119\nSource: Tene' áak'ab kin bin mer...,
1,Maya0120,Tene' áak'ab kin bin merkaado,SUBTYPE:INT,Tene' wáaj áak'ab kin bin merkaado,maya,train,Id: Maya0120\nSource: Tene' áak'ab kin bin mer...,Id: Maya0120\nSource: Tene' áak'ab kin bin mer...,
2,Maya0317,Ko'one'ex ich kool,PERSON:1_PL,Ko'ox ich kool,maya,train,Id: Maya0317\nSource: Ko'one'ex ich kool\nChan...,Id: Maya0317\nSource: Ko'one'ex ich kool\nChan...,
3,Maya0620,Táan a bine'ex ich kool,TYPE:NEG,Ma' táan a bine'ex ich kooli',maya,train,Id: Maya0620\nSource: Táan a bine'ex ich kool\...,Id: Maya0620\nSource: Táan a bine'ex ich kool\...,
4,Maya0621,Táan a bine'ex ich kool,SUBTYPE:INT,Táan wáaj a bine'ex ich kool,maya,train,Id: Maya0621\nSource: Táan a bine'ex ich kool\...,Id: Maya0621\nSource: Táan a bine'ex ich kool\...,
...,...,...,...,...,...,...,...,...,...
1516,Bribri0676,Ye' tö dawà su',"TYPE:NEG, TENSE:FUT_CER, ASPECT:IPFV, VOICE:MI...",Kë̀ dawà sùrpa,bribri,dev,Id: Bribri0676\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0676\nSource: Ye' tö dawà su'\nChang...,
1517,Bribri0677,Ye' tö dawà su',TENSE:PAS_PLU,Ye' wa̠ dawà súrule,bribri,dev,Id: Bribri0677\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0677\nSource: Ye' tö dawà su'\nChang...,
1518,Bribri0678,Ye' tö dawà su',"TYPE:NEG, TENSE:PAS_PLU",Ye' kë̀ wa̠ dawà súrule,bribri,dev,Id: Bribri0678\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0678\nSource: Ye' tö dawà su'\nChang...,
1519,Bribri0679,Ye' tö dawà su',"MODE:POT, TENSE:IPFV_REC, ASPECT:IPFV",Ye' a̠ dawà súr,bribri,dev,Id: Bribri0679\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0679\nSource: Ye' tö dawà su'\nChang...,


In [37]:
from openai import OpenAI
import re
from tqdm.notebook import tqdm

if 'api_key' not in vars():
    api_key = input("OpenAI API Key:")

client = OpenAI(api_key=api_key)

def run_prompt_full_context(lang, test_IDs=None):
    """Runs a GPT prompt for a specified row in the dev/test set. Uses the entire `train` split as context.

    Args:
        lang: 'bribri' | 'guarani' | 'maya'
        test_ID The ID of a row in the dev/test set to run inference on.
    """
    train_split = df[(df['language'] == lang) & (df['split'] == 'train')]

    if test_IDs is not None:
        test_sentences = df[df['ID'].isin(test_IDs)]
    else:
        test_sentences = df[(df['language'] == lang) & (df['split'] == 'dev')]

    system_prompt = f"You are an expert in the {lang.capitalize()} language. You are creating education materials by taking a given sentence in {lang.capitalize()} and a label indicating a change in one or more linguistic features, and outputting the sentence transformed by changing that feature."
    context = '\n\n'.join(train_split['Formatted'])
    test_examples = '\n\n'.join(test_sentences['Formatted_Covered'])


    prompt = f"""Below are examples of a sentence in {lang.capitalize()}, the linguistic change, and the target sentence after applying the change.
    
{context}

Below is a list of similar examples, where the source sentence and linguistic change are given, and the output sentence is not known. For each example, please output only the id and target sentence values, as in

ID: some id
Target: sentence after applying the change


Do not output any additional text, and do not output the Source or Change fields. This is very important, take your time and do not mess up or I will lose my job.

{test_examples}
    """

    completion = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        temperature=1,
        top_p=1,
        seed=430
    )
    print(completion.usage)
    print(completion.model)

    pattern = r"Id: (\S+)\nTarget: (.*)\n"
    resp = completion.choices[0].message.content
    print(resp)
    matches = re.findall(pattern, resp, re.M)
    matches_dict = dict()
    for match in matches:
        matches_dict[match[0]] = match[1]
    return matches_dict

for language in tqdm(['guarani', 'maya']):
    for chunk in tqdm(range(3)):
        test_chunk = df[(df['language'] == language) & (df['split'] == 'dev')]['ID'].values[chunk*80: (chunk+1)*80]
        pred_dict = run_prompt_full_context(lang=language, test_IDs=test_chunk)
        for pred_id, pred_string in pred_dict.items():
            df.loc[df['ID'] == pred_id, 'Predicted Target'] = pred_string

df

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

CompletionUsage(completion_tokens=1702, prompt_tokens=10973, total_tokens=12675)
gpt-4-0125-preview
Id: Guarani0232
Target: Ore rombyai kuri

Id: Guarani0233
Target: Ore ndorombyaita kuri

Id: Guarani0234
Target: Ñande ndorombyai kuri

Id: Guarani0235
Target: Che ndorombyai kuri

Id: Guarani0236
Target: Peẽ ndopeñombyai kuri

Id: Guarani0237
Target: Nde nderombyai kuri

Id: Guarani0073
Target: Peẽ peñanga’uta

Id: Guarani0074
Target: Ha’ekuéra oñanga’uta

Id: Guarani0075
Target: Che añanga’uta

Id: Guarani0076
Target: Peẽ napeñanga’ukuri

Id: Guarani0077
Target: Peẽ peñanga’u

Id: Guarani0238
Target: Peẽ peñemongetaikuri

Id: Guarani0239
Target: Ñande peñemongetami

Id: Guarani0240
Target: Ore noroñemongetami

Id: Guarani0241
Target: Peẽ peñemongetata

Id: Guarani0242
Target: Peẽ ndapeñemongetami

Id: Guarani0243
Target: Peẽ peñemongeta

Id: Guarani0048
Target: Che ahechá kuri peteĩ óga

Id: Guarani0049
Target: Che ndahechámi hína kuri peteĩ óga

Id: Guarani0050
Target: Che ahecha kuri

  0%|          | 0/3 [00:00<?, ?it/s]

CompletionUsage(completion_tokens=4096, prompt_tokens=31171, total_tokens=35267)
gpt-4-0125-preview
Id: Maya0066
Target: Janalo'on tu k'íiwikil koonol

Id: Maya0067
Target: Janale'ex tu k'íiwikil koonol

Id: Maya0068
Target: Janalech tu k'íiwikil koonol

Id: Maya0622
Target: Ma' táan ek bin ich kooli'

Id: Maya0623
Target: Táan wáaj ek bin ich kool

Id: Maya0605
Target: Teche' ma' táan a bin xíimbal tu najili'

Id: Maya0606
Target: Teche' ka bin wáaj xíimbal tu najil

Id: Maya0127
Target: Te'exe' ma' táan a bine'ex koonol tu k'íiwikil koonoli'

Id: Maya0128
Target: Te'exe' táan wáaj a bine'ex koonol tu k'íiwikil koonol

Id: Maya0259
Target: Táan a míistik a wotochi'

Id: Maya0160
Target: Ma' jach k'a'abéet u bin merkaadoi'

Id: Maya0161
Target: Ma' jach k'a'abéet u bino'ob merkaadoi'

Id: Maya0162
Target: Ma' jach k'a'abéet a bine'ex merkaadoi'

Id: Maya0188
Target: Ba'ax k'iine'ex a bin merkaado

Id: Maya0189
Target: Ba'ax k'iino'ob a bin merkaado

Id: Maya0245
Target: Ta míistik in w

Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Maya0119,Tene' áak'ab kin bin merkaado,TYPE:NEG,Tene' ma' áak'ab kin bin merkaadoi',maya,train,Id: Maya0119\nSource: Tene' áak'ab kin bin mer...,Id: Maya0119\nSource: Tene' áak'ab kin bin mer...,
1,Maya0120,Tene' áak'ab kin bin merkaado,SUBTYPE:INT,Tene' wáaj áak'ab kin bin merkaado,maya,train,Id: Maya0120\nSource: Tene' áak'ab kin bin mer...,Id: Maya0120\nSource: Tene' áak'ab kin bin mer...,
2,Maya0317,Ko'one'ex ich kool,PERSON:1_PL,Ko'ox ich kool,maya,train,Id: Maya0317\nSource: Ko'one'ex ich kool\nChan...,Id: Maya0317\nSource: Ko'one'ex ich kool\nChan...,
3,Maya0620,Táan a bine'ex ich kool,TYPE:NEG,Ma' táan a bine'ex ich kooli',maya,train,Id: Maya0620\nSource: Táan a bine'ex ich kool\...,Id: Maya0620\nSource: Táan a bine'ex ich kool\...,
4,Maya0621,Táan a bine'ex ich kool,SUBTYPE:INT,Táan wáaj a bine'ex ich kool,maya,train,Id: Maya0621\nSource: Táan a bine'ex ich kool\...,Id: Maya0621\nSource: Táan a bine'ex ich kool\...,
...,...,...,...,...,...,...,...,...,...
1516,Bribri0676,Ye' tö dawà su',"TYPE:NEG, TENSE:FUT_CER, ASPECT:IPFV, VOICE:MI...",Kë̀ dawà sùrpa,bribri,dev,Id: Bribri0676\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0676\nSource: Ye' tö dawà su'\nChang...,Kë̀ dawàrpa
1517,Bribri0677,Ye' tö dawà su',TENSE:PAS_PLU,Ye' wa̠ dawà súrule,bribri,dev,Id: Bribri0677\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0677\nSource: Ye' tö dawà su'\nChang...,Ye' wa̠ dawà surule
1518,Bribri0678,Ye' tö dawà su',"TYPE:NEG, TENSE:PAS_PLU",Ye' kë̀ wa̠ dawà súrule,bribri,dev,Id: Bribri0678\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0678\nSource: Ye' tö dawà su'\nChang...,Ye' kë̀ wa̠ dawà surule
1519,Bribri0679,Ye' tö dawà su',"MODE:POT, TENSE:IPFV_REC, ASPECT:IPFV",Ye' a̠ dawà súr,bribri,dev,Id: Bribri0679\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0679\nSource: Ye' tö dawà su'\nChang...,Ye' a̠ dawà sör


In [43]:
df[(df['language'] == 'bribri') & (df['split'] == 'dev')].to_csv("bribri-dev-preds.tsv", sep="\t")
df[(df['language'] == 'guarani') & (df['split'] == 'dev')].to_csv("guarani-dev-preds.tsv", sep="\t")
df[(df['language'] == 'maya') & (df['split'] == 'dev')].to_csv("maya-dev-preds.tsv", sep="\t")