# LLM Skyline
GPT has demonstrated excellent performance on the task using in-context learning. Here, we run a systematic evaluation, in order to provide a *skyline*, i.e. near-optimal automated system. Of course, making OpenAI API calls might not be ideal in real usage for a number of reasons.

In [73]:
import pandas as pd
import os

# Read all of the data into a single combined DF
folder_path = '../americasnlp2024/ST2_EducationalMaterials/data/'
all_data = []
for filename in os.listdir(folder_path):
    if filename.endswith('.tsv'):
        split_df = pd.read_csv(os.path.join(folder_path, filename), delimiter='\t')
        [split_df['language'], split_df['split']] = filename[:-4].split("-")
        all_data.append(split_df)

df = pd.concat(all_data, ignore_index=True)

# We'll add spaces between letters to avoid tokenization issues
df['Formatted'] = df.apply(lambda row: f"Id: {row['ID']}\nSource: {' '.join(row['Source'])}\nChange: {row['Change']}\nTarget: {' '.join(row['Target'])}", axis=1)
df['Formatted_Covered'] = df.apply(lambda row: f"Id: {row['ID']}\nSource: {' '.join(row['Source'])}\nChange: {row['Change']}\nTarget: ", axis=1)
df['Predicted Target'] = ''
df

Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Maya0119,Tene' áak'ab kin bin merkaado,TYPE:NEG,Tene' ma' áak'ab kin bin merkaadoi',maya,train,Id: Maya0119\nSource: T e n e ' á a k ' a b ...,Id: Maya0119\nSource: T e n e ' á a k ' a b ...,
1,Maya0120,Tene' áak'ab kin bin merkaado,SUBTYPE:INT,Tene' wáaj áak'ab kin bin merkaado,maya,train,Id: Maya0120\nSource: T e n e ' á a k ' a b ...,Id: Maya0120\nSource: T e n e ' á a k ' a b ...,
2,Maya0317,Ko'one'ex ich kool,PERSON:1_PL,Ko'ox ich kool,maya,train,Id: Maya0317\nSource: K o ' o n e ' e x i c ...,Id: Maya0317\nSource: K o ' o n e ' e x i c ...,
3,Maya0620,Táan a bine'ex ich kool,TYPE:NEG,Ma' táan a bine'ex ich kooli',maya,train,Id: Maya0620\nSource: T á a n a b i n e ' ...,Id: Maya0620\nSource: T á a n a b i n e ' ...,
4,Maya0621,Táan a bine'ex ich kool,SUBTYPE:INT,Táan wáaj a bine'ex ich kool,maya,train,Id: Maya0621\nSource: T á a n a b i n e ' ...,Id: Maya0621\nSource: T á a n a b i n e ' ...,
...,...,...,...,...,...,...,...,...,...
1516,Bribri0676,Ye' tö dawà su',"TYPE:NEG, TENSE:FUT_CER, ASPECT:IPFV, VOICE:MI...",Kë̀ dawà sùrpa,bribri,dev,Id: Bribri0676\nSource: Y e ' t ö d a w à ...,Id: Bribri0676\nSource: Y e ' t ö d a w à ...,
1517,Bribri0677,Ye' tö dawà su',TENSE:PAS_PLU,Ye' wa̠ dawà súrule,bribri,dev,Id: Bribri0677\nSource: Y e ' t ö d a w à ...,Id: Bribri0677\nSource: Y e ' t ö d a w à ...,
1518,Bribri0678,Ye' tö dawà su',"TYPE:NEG, TENSE:PAS_PLU",Ye' kë̀ wa̠ dawà súrule,bribri,dev,Id: Bribri0678\nSource: Y e ' t ö d a w à ...,Id: Bribri0678\nSource: Y e ' t ö d a w à ...,
1519,Bribri0679,Ye' tö dawà su',"MODE:POT, TENSE:IPFV_REC, ASPECT:IPFV",Ye' a̠ dawà súr,bribri,dev,Id: Bribri0679\nSource: Y e ' t ö d a w à ...,Id: Bribri0679\nSource: Y e ' t ö d a w à ...,


In [84]:
import re
def remove_single_spaces(text: str) -> str:
    # Replace single spaces between letters with no space
    return re.sub("\s+", " ", re.sub(r'(?<=\w|\') (?=\w|\')', '', text))

# Fixes unattached diacritics
def attach_diacritics(text: str) -> str:
    # Function to reorder each match
    def reorder(match):
        char, diacritic = match.groups()
        # Return the reordered string with the diacritic attached to the character
        return char + diacritic
    
    # Regular expression to find a character followed by a space and then the diacritic
    pattern = r'(\w) ([ ̀ ̠])'
    # Replace occurrences found by the pattern with the reordered version
    adjusted_text = re.sub(pattern, reorder, text)
    
    return adjusted_text

remove_single_spaces(attach_diacritics("P û s   k ë ̀   k u ̠   k a p ë ' w a ̠"))

"Pûs kë̀ ku̠ kapë'wa̠"

## Naive ICL

In [87]:
from openai import OpenAI
import re
from tqdm.notebook import tqdm

if 'api_key' not in vars():
    api_key = input("OpenAI API Key:")

client = OpenAI(api_key=api_key)


def run_prompt_full_context(lang, log_file, test_IDs=None):
    """Runs a GPT prompt for a specified row in the dev/test set. Uses the entire `train` split as context.

    Args:
        lang: 'bribri' | 'guarani' | 'maya'
        test_ID The ID of a row in the dev/test set to run inference on.
    """
    train_split = df[(df['language'] == lang) & (df['split'] == 'train')]

    if test_IDs is not None:
        test_sentences = df[df['ID'].isin(test_IDs)]
    else:
        test_sentences = df[(df['language'] == lang) & (df['split'] == 'dev')]

    system_prompt = f"You are an expert in the {lang.capitalize()} language. You are creating education materials by taking a given sentence in {lang.capitalize()} and a label indicating a change in one or more linguistic features, and outputting the sentence transformed by changing that feature."
    context = '\n\n'.join(train_split['Formatted'])
    test_examples = '\n\n'.join(test_sentences['Formatted_Covered'])


    prompt = f"""Below are examples of a sentence in {lang.capitalize()}, the linguistic change, and the target sentence after applying the change.
    
{context}

Below is a list of similar examples, where the source sentence and linguistic change are given, and the output sentence is not known. For each example, please output only the id and target sentence values, as in

ID: some id
Target: sentence after applying the change


Do not output any additional text, and do not output the Source or Change fields. This is very important, take your time and do not mess up or I will lose my job.

{test_examples}
    """

    completion = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        temperature=1,
        top_p=1,
        seed=430
    )
    print(completion.usage)
    print(completion.model)

    pattern = r"I[dD]: (\S+)\nTarget: (.*)(\n|$)"
    resp = completion.choices[0].message.content
    
    with open(log_file, 'a') as log:
        log.write("\n\nPROMPT:\n" + prompt)
        log.write("\nRESPONSE:\n" + resp)

    matches = re.findall(pattern, resp, re.M)
    matches_dict = dict()
    for match in matches:
        matches_dict[match[0]] = remove_single_spaces(attach_diacritics(match[1]))
    return matches_dict


In [88]:
import math

def test_full_context(chunk_size, df):
    df = df.copy(deep=True)
    for language in tqdm(['bribri', 'guarani', 'maya']):
        lang_test_size = len(df[(df['language'] == language) & (df['split'] == 'dev')])

        for chunk in tqdm(range(math.ceil(lang_test_size / chunk_size))):
            print(f"Testing indices {chunk*chunk_size} through {(chunk+1)*chunk_size}")
            test_chunk = df[(df['language'] == language) & (df['split'] == 'dev')]['ID'].values[chunk*chunk_size: (chunk+1)*chunk_size]
            pred_dict = run_prompt_full_context(lang=language, test_IDs=test_chunk, log_file=f"./{language}.log")
            for pred_id, pred_string in pred_dict.items():
                df.loc[df['ID'] == pred_id, 'Predicted Target'] = pred_string

    df[(df['language'] == 'bribri') & (df['split'] == 'dev')].to_csv("bribri-dev-preds.tsv", sep="\t")
    df[(df['language'] == 'guarani') & (df['split'] == 'dev')].to_csv("guarani-dev-preds.tsv", sep="\t")
    df[(df['language'] == 'maya') & (df['split'] == 'dev')].to_csv("maya-dev-preds.tsv", sep="\t")

    return df

preds = test_full_context(chunk_size=80, df=df)

preds

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Testing indices 0 through 80
CompletionUsage(completion_tokens=2479, prompt_tokens=25838, total_tokens=28317)
gpt-4-0125-preview
Testing indices 80 through 160
CompletionUsage(completion_tokens=2414, prompt_tokens=25967, total_tokens=28381)
gpt-4-0125-preview
Testing indices 160 through 240
CompletionUsage(completion_tokens=1646, prompt_tokens=24689, total_tokens=26335)
gpt-4-0125-preview


  0%|          | 0/1 [00:00<?, ?it/s]

Testing indices 0 through 80
CompletionUsage(completion_tokens=1627, prompt_tokens=17403, total_tokens=19030)
gpt-4-0125-preview


  0%|          | 0/2 [00:00<?, ?it/s]

Testing indices 0 through 80
CompletionUsage(completion_tokens=4096, prompt_tokens=52534, total_tokens=56630)
gpt-4-0125-preview
Testing indices 80 through 160
CompletionUsage(completion_tokens=2778, prompt_tokens=52154, total_tokens=54932)
gpt-4-0125-preview


Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Maya0119,Tene' áak'ab kin bin merkaado,TYPE:NEG,Tene' ma' áak'ab kin bin merkaadoi',maya,train,Id: Maya0119\nSource: T e n e ' á a k ' a b ...,Id: Maya0119\nSource: T e n e ' á a k ' a b ...,
1,Maya0120,Tene' áak'ab kin bin merkaado,SUBTYPE:INT,Tene' wáaj áak'ab kin bin merkaado,maya,train,Id: Maya0120\nSource: T e n e ' á a k ' a b ...,Id: Maya0120\nSource: T e n e ' á a k ' a b ...,
2,Maya0317,Ko'one'ex ich kool,PERSON:1_PL,Ko'ox ich kool,maya,train,Id: Maya0317\nSource: K o ' o n e ' e x i c ...,Id: Maya0317\nSource: K o ' o n e ' e x i c ...,
3,Maya0620,Táan a bine'ex ich kool,TYPE:NEG,Ma' táan a bine'ex ich kooli',maya,train,Id: Maya0620\nSource: T á a n a b i n e ' ...,Id: Maya0620\nSource: T á a n a b i n e ' ...,
4,Maya0621,Táan a bine'ex ich kool,SUBTYPE:INT,Táan wáaj a bine'ex ich kool,maya,train,Id: Maya0621\nSource: T á a n a b i n e ' ...,Id: Maya0621\nSource: T á a n a b i n e ' ...,
...,...,...,...,...,...,...,...,...,...
1516,Bribri0676,Ye' tö dawà su',"TYPE:NEG, TENSE:FUT_CER, ASPECT:IPFV, VOICE:MI...",Kë̀ dawà sùrpa,bribri,dev,Id: Bribri0676\nSource: Y e ' t ö d a w à ...,Id: Bribri0676\nSource: Y e ' t ö d a w à ...,Kë̀ dáwàsùrpa
1517,Bribri0677,Ye' tö dawà su',TENSE:PAS_PLU,Ye' wa̠ dawà súrule,bribri,dev,Id: Bribri0677\nSource: Y e ' t ö d a w à ...,Id: Bribri0677\nSource: Y e ' t ö d a w à ...,Ye' wa̠ dawà surule
1518,Bribri0678,Ye' tö dawà su',"TYPE:NEG, TENSE:PAS_PLU",Ye' kë̀ wa̠ dawà súrule,bribri,dev,Id: Bribri0678\nSource: Y e ' t ö d a w à ...,Id: Bribri0678\nSource: Y e ' t ö d a w à ...,Ye' kë̀ wa̠ dawà surule
1519,Bribri0679,Ye' tö dawà su',"MODE:POT, TENSE:IPFV_REC, ASPECT:IPFV",Ye' a̠ dawà súr,bribri,dev,Id: Bribri0679\nSource: Y e ' t ö d a w à ...,Id: Bribri0679\nSource: Y e ' t ö d a w à ...,Ye' a̠ dawà sur


In [48]:
preds = test_full_context(chunk_size=20, df=df)
preds

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Testing indices 0 through 20
CompletionUsage(completion_tokens=433, prompt_tokens=17559, total_tokens=17992)
gpt-4-0125-preview
ID: Bribri0359
Target: Pûs kapë'ulurwa̠

ID: Bribri0360
Target: Pûs kë̀ wa̠ kapë'wa̠

ID: Bribri0361
Target: Pûs kapë'wé

ID: Bribri0362
Target: Pûs kapë'wé ulur

ID: Bribri0363
Target: Pûs kapë'wö̀

ID: Bribri0364
Target: Pûs tso kapë'wö́k

ID: Bribri0365
Target: Pûs kë̀ ku̠ kapë'wö́k

ID: Bribri0366
Target: Pûs bák kapë'wö́k

ID: Bribri0367
Target: Pûs kë̀ ku'bak kapë'wö́k

ID: Bribri0368
Target: Pûs é̠n a̠ kapë'wa'ku̠

ID: Bribri0369
Target: Pûs kapë'wö̀ke̠

ID: Bribri0370
Target: Kapë'wö́

ID: Bribri0371
Target: Kapë'wáne

ID: Bribri0372
Target: Kë̀ kapë'wáne

ID: Bribri0373
Target: Kapë'wár

ID: Bribri0234
Target: Ye' kë̀ wa̠ i kítne̠

ID: Bribri0235
Target: Ye' tö i kité

ID: Bribri0236
Target: Ye' kë̀ wa̠ i kítne̠

ID: Bribri0237
Target: Ye' tö i kitè

ID: Bribri0238
Target: Kë̀ ye' tö i kitè
Testing indices 20 through 40
CompletionUsage(completion_toke

  0%|          | 0/4 [00:00<?, ?it/s]

Testing indices 0 through 20
CompletionUsage(completion_tokens=388, prompt_tokens=9066, total_tokens=9454)
gpt-4-0125-preview
Id: Guarani0232
Target: Ore rombyai kuri

Id: Guarani0233
Target: Ore ndorombyaita kuri

Id: Guarani0234
Target: Ñande ndorombyai kuri

Id: Guarani0235
Target: Che ndaiky'ai kuri

Id: Guarani0236
Target: Peẽ napeñorombyai kuri

Id: Guarani0237
Target: Nde ndererombyai kuri

Id: Guarani0073
Target: Peẽ peñanga’u

Id: Guarani0074
Target: Ha’ekuéra oñanga’u

Id: Guarani0075
Target: Che añanga’u

Id: Guarani0076
Target: Peẽ peñanga'úkuri

Id: Guarani0077
Target: Peẽ peñanga'u

Id: Guarani0238
Target: Peẽ peñemongetakuri

Id: Guarani0239
Target: Ñande ñañemongetami

Id: Guarani0240
Target: Ore jaroñemongetami

Id: Guarani0241
Target: Peẽ peñemongetáta

Id: Guarani0242
Target: Peẽ ndapeñemongetái

Id: Guarani0243
Target: Peẽ peñemongeta

Id: Guarani0048
Target: Che ahecha kuri peteĩ óga

Id: Guarani0049
Target: Che ndahechái kuri hína peteĩ óga

Id: Guarani0050
Target

  0%|          | 0/8 [00:00<?, ?it/s]

Testing indices 0 through 20
CompletionUsage(completion_tokens=473, prompt_tokens=29256, total_tokens=29729)
gpt-4-0125-preview
Id: Maya0066
Target: Janalo'ob tu k'íiwikil koonol

Id: Maya0067
Target: Janale'ex tu k'íiwikil koonol

Id: Maya0068
Target: Janalech tu k'íiwikil koonol

Id: Maya0622
Target: Ma' táan ek bin ich kooli'

Id: Maya0623
Target: Táan wáaj ek bin ich kool

Id: Maya0605
Target: Teche' ma' táan ka bin xíimbal tu najili'

Id: Maya0606
Target: Te'exe' áak'ab ka bin xíimbal tu najil wáaj

Id: Maya0127
Target: Te'exe' ma' táan a bine'ex koonol tu k'íiwikil koonoli'

Id: Maya0128
Target: Te'exe' táan wáaj a bine'ex koonol tu k'íiwikil koonol

Id: Maya0259
Target: Táan a míistik a wotoch

Id: Maya0160
Target: Ma' jach k'a'abéet u bin merkaadoi'

Id: Maya0161
Target: Ma' jach k'a'abéet u bino'ob merkaadoi'

Id: Maya0162
Target: Ma' jach k'a'abéet a bine'ex merkaadoi'

Id: Maya0188
Target: Ba'ax k'iine'ex a bin merkaado

Id: Maya0189
Target: Ba'ax k'iino'ob a bin merkaado

I

Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Maya0119,Tene' áak'ab kin bin merkaado,TYPE:NEG,Tene' ma' áak'ab kin bin merkaadoi',maya,train,Id: Maya0119\nSource: Tene' áak'ab kin bin mer...,Id: Maya0119\nSource: Tene' áak'ab kin bin mer...,
1,Maya0120,Tene' áak'ab kin bin merkaado,SUBTYPE:INT,Tene' wáaj áak'ab kin bin merkaado,maya,train,Id: Maya0120\nSource: Tene' áak'ab kin bin mer...,Id: Maya0120\nSource: Tene' áak'ab kin bin mer...,
2,Maya0317,Ko'one'ex ich kool,PERSON:1_PL,Ko'ox ich kool,maya,train,Id: Maya0317\nSource: Ko'one'ex ich kool\nChan...,Id: Maya0317\nSource: Ko'one'ex ich kool\nChan...,
3,Maya0620,Táan a bine'ex ich kool,TYPE:NEG,Ma' táan a bine'ex ich kooli',maya,train,Id: Maya0620\nSource: Táan a bine'ex ich kool\...,Id: Maya0620\nSource: Táan a bine'ex ich kool\...,
4,Maya0621,Táan a bine'ex ich kool,SUBTYPE:INT,Táan wáaj a bine'ex ich kool,maya,train,Id: Maya0621\nSource: Táan a bine'ex ich kool\...,Id: Maya0621\nSource: Táan a bine'ex ich kool\...,
...,...,...,...,...,...,...,...,...,...
1516,Bribri0676,Ye' tö dawà su',"TYPE:NEG, TENSE:FUT_CER, ASPECT:IPFV, VOICE:MI...",Kë̀ dawà sùrpa,bribri,dev,Id: Bribri0676\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0676\nSource: Ye' tö dawà su'\nChang...,
1517,Bribri0677,Ye' tö dawà su',TENSE:PAS_PLU,Ye' wa̠ dawà súrule,bribri,dev,Id: Bribri0677\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0677\nSource: Ye' tö dawà su'\nChang...,
1518,Bribri0678,Ye' tö dawà su',"TYPE:NEG, TENSE:PAS_PLU",Ye' kë̀ wa̠ dawà súrule,bribri,dev,Id: Bribri0678\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0678\nSource: Ye' tö dawà su'\nChang...,
1519,Bribri0679,Ye' tö dawà su',"MODE:POT, TENSE:IPFV_REC, ASPECT:IPFV",Ye' a̠ dawà súr,bribri,dev,Id: Bribri0679\nSource: Ye' tö dawà su'\nChang...,Id: Bribri0679\nSource: Ye' tö dawà su'\nChang...,


## Retrieval

Rather than passing full context, let's try selecting informative examples for each sentence. First, we'll split up sentences based on the linguistic change tags. Then, for each group of sentences, we'll retrieve train sentences with similar tags.

In [63]:
def run_prompt_naive_retrieval(lang, test_IDs):
    """Runs a GPT prompt for a specified row in the dev/test set. Retrieves items from the `train` split that have the same change tags.

    Args:
        lang: 'bribri' | 'guarani' | 'maya'
        test_ID The ID of a row in the dev/test set to run inference on.
    """
    train_split = df[(df['language'] == lang) & (df['split'] == 'train')]

    assert(test_IDs is not None)

    test_sentences = df[df['ID'].isin(test_IDs)]

    # Determine the tags that appear in the test sentences
    test_change_tags = test_sentences['Change'].unique()
    filtered_train = train_split[train_split['Change'].isin(test_change_tags)]

    system_prompt = f"You are an expert in the {lang.capitalize()} language. You are creating education materials by taking a given sentence in {lang.capitalize()} and a label indicating a change in one or more linguistic features, and outputting the sentence transformed by changing that feature. All Bribri text is seperated by spaces."
    context = '\n\n'.join(filtered_train['Formatted'])
    test_examples = '\n\n'.join(test_sentences['Formatted_Covered'])


    prompt = f"""Below are examples of a sentence in {lang.capitalize()}, the linguistic change, and the target sentence after applying the change.
    
{context}

Below is a list of similar examples, where the source sentence and linguistic change are given, and the output sentence is not known. For each example, please output only the id and target sentence values, as in

ID: some id
Target: sentence after applying the change


Do not output any additional text, and do not output the Source or Change fields. This is very important, take your time and do not mess up or I will lose my job.

{test_examples}
    """

    print(prompt)

    completion = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        temperature=1,
        top_p=1,
        seed=430
    )
    print(completion.usage)
    print(completion.model)

    pattern = r"I[dD]: (\S+)\nTarget: (.*)(\n|$)"
    resp = completion.choices[0].message.content
    print(resp)
    matches = re.findall(pattern, resp, re.M)
    matches_dict = dict()
    for match in matches:
        matches_dict[match[0]] = match[1]
    return matches_dict

run_prompt_naive_retrieval("bribri", test_IDs=["Bribri0362"])

Below are examples of a sentence in Bribri, the linguistic change, and the target sentence after applying the change.
    
Id: Bribri0899
Source: I e '   t ö   b ö '   y ë ' s t s a ̠
Change: TENSE:PRF_REC, ABSNUM:PL
Target: I e '   t ö   b ö '   y é u l u r

Below is a list of similar examples, where the source sentence and linguistic change are given, and the output sentence is not known. For each example, please output only the id and target sentence values, as in

ID: some id
Target: sentence after applying the change


Do not output any additional text, and do not output the Source or Change fields. This is very important, take your time and do not mess up or I will lose my job.

Id: Bribri0362
Source: P û s   k a p ë ' w a ̠
Change: TENSE:PRF_REC, ABSNUM:PL
Target: 
    
CompletionUsage(completion_tokens=22, prompt_tokens=301, total_tokens=323)
gpt-4-0125-preview
ID: Bribri0362
Target: P û s k a p é u l u r


{'Bribri0362': 'P û s k a p é u l u r'}