# LLM Skyline
GPT has demonstrated excellent performance on the task using in-context learning. Here, we run a systematic evaluation, in order to provide a *skyline*, i.e. near-optimal automated system. Of course, making OpenAI API calls might not be ideal in real usage for a number of reasons.

In [1]:
import pandas as pd
import os

# Read all of the data into a single combined DF
folder_path = '../americasnlp2024/ST2_EducationalMaterials/data/'
all_data = []
for filename in os.listdir(folder_path):
    if filename.endswith('.tsv'):
        split_df = pd.read_csv(os.path.join(folder_path, filename), delimiter='\t')
        [split_df['language'], split_df['split']] = filename[:-4].split("-")
        all_data.append(split_df)

df = pd.concat(all_data, ignore_index=True)
df['Formatted'] = df.apply(lambda row: f"Source: {row['Source']}\nChange: {row['Change']}\nTarget: {row['Target']}", axis=1)
df

Unnamed: 0,ID,Source,Change,Target,language,split,Formatted
0,Maya0119,Tene' áak'ab kin bin merkaado,TYPE:NEG,Tene' ma' áak'ab kin bin merkaadoi',maya,train,Source: Tene' áak'ab kin bin merkaado\nChange:...
1,Maya0120,Tene' áak'ab kin bin merkaado,SUBTYPE:INT,Tene' wáaj áak'ab kin bin merkaado,maya,train,Source: Tene' áak'ab kin bin merkaado\nChange:...
2,Maya0317,Ko'one'ex ich kool,PERSON:1_PL,Ko'ox ich kool,maya,train,Source: Ko'one'ex ich kool\nChange: PERSON:1_P...
3,Maya0620,Táan a bine'ex ich kool,TYPE:NEG,Ma' táan a bine'ex ich kooli',maya,train,Source: Táan a bine'ex ich kool\nChange: TYPE:...
4,Maya0621,Táan a bine'ex ich kool,SUBTYPE:INT,Táan wáaj a bine'ex ich kool,maya,train,Source: Táan a bine'ex ich kool\nChange: SUBTY...
...,...,...,...,...,...,...,...
1516,Bribri0676,Ye' tö dawà su',"TYPE:NEG, TENSE:FUT_CER, ASPECT:IPFV, VOICE:MI...",Kë̀ dawà sùrpa,bribri,dev,"Source: Ye' tö dawà su'\nChange: TYPE:NEG, TEN..."
1517,Bribri0677,Ye' tö dawà su',TENSE:PAS_PLU,Ye' wa̠ dawà súrule,bribri,dev,Source: Ye' tö dawà su'\nChange: TENSE:PAS_PLU...
1518,Bribri0678,Ye' tö dawà su',"TYPE:NEG, TENSE:PAS_PLU",Ye' kë̀ wa̠ dawà súrule,bribri,dev,"Source: Ye' tö dawà su'\nChange: TYPE:NEG, TEN..."
1519,Bribri0679,Ye' tö dawà su',"MODE:POT, TENSE:IPFV_REC, ASPECT:IPFV",Ye' a̠ dawà súr,bribri,dev,"Source: Ye' tö dawà su'\nChange: MODE:POT, TEN..."


In [4]:
from openai import OpenAI
client = OpenAI(api_key='')

def run_prompt_full_context(lang, test_ID):
    """Runs a GPT prompt for a specified row in the dev/test set. Uses the entire `train` split as context.

    Args:
        lang: 'bribri' | 'guarani' | 'maya'
        test_ID The ID of a row in the dev/test set to run inference on.
    """
    train_split = df[(df['language'] == lang) & (df['split'] == 'train')]
    test_sentence = df[df['ID'] == test_ID]

    system_prompt = f"You are an expert in the {lang.capitalize()} language. You are creating education materials by taking a given sentence in {lang.capitalize()} and a label indicating a change in one or more linguistic features, and outputting the sentence transformed by changing that feature."
    context = '\n\n'.join(train_split['Formatted'])
    prompt = f"""Below are examples of a sentence in {lang.capitalize()}, the linguistic change, and the target sentence after applying the change.
    
    {context}

    Given the following sentence and linguistic change, please output only the target sentence after applying the change. Do not output any additional text.

    Source: {test_sentence['Source'].values[0]}
    Change: {test_sentence['Change'].values[0]}
    Target:
    """

    completion = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]
    )
    return completion.choices[0].message, test_sentence['Target'].values[0]

run_prompt_full_context('bribri', 'Bribri0361')

(ChatCompletionMessage(content='Pûs kapéwa̠', role='assistant', function_call=None, tool_calls=None),
 'Pûs kapówa̠')