# LLM Skyline
GPT has demonstrated excellent performance on the task using in-context learning. Here, we run a systematic evaluation, in order to provide a *skyline*, i.e. near-optimal automated system. Of course, making OpenAI API calls might not be ideal in real usage for a number of reasons.

In [1]:
import pandas as pd
import os

# Read all of the data into a single combined DF
folder_path = 'americasnlp2024/ST2_EducationalMaterials/data/'
all_data = []
for filename in os.listdir(folder_path):
    if filename.endswith('.tsv'):
        split_df = pd.read_csv(os.path.join(folder_path, filename), delimiter='\t')
        [split_df['language'], split_df['split']] = filename[:-4].split("-")
        all_data.append(split_df)

df = pd.concat(all_data, ignore_index=True)

# We'll add spaces between letters to avoid tokenization issues
df['Formatted'] = df.apply(lambda row: f"Id: {row['ID']}\nSource: {row['Source']}\nChange: {row['Change']}\nTarget: {row['Target']}", axis=1)
df['Formatted_Covered'] = df.apply(lambda row: f"Id: {row['ID']}\nSource: {row['Source']}\nChange: {row['Change']}\nTarget: ", axis=1)
df['Predicted Target'] = ''


Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Bribri0359,Pûs kapë'wa̠,ABSNUM:PL,Pûs kapë'ulur,bribri,dev,Id: Bribri0359\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0359\nSource: Pûs kapë'wa̠\nChange: ...,
1,Bribri0360,Pûs kapë'wa̠,TYPE:NEG,Pûs kë̀ kapë̀ne̠wa̠,bribri,dev,Id: Bribri0360\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0360\nSource: Pûs kapë'wa̠\nChange: ...,
2,Bribri0361,Pûs kapë'wa̠,TENSE:PRF_REC,Pûs kapówa̠,bribri,dev,Id: Bribri0361\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0361\nSource: Pûs kapë'wa̠\nChange: ...,
3,Bribri0362,Pûs kapë'wa̠,"TENSE:PRF_REC, ABSNUM:PL",Pûs kapóulur,bribri,dev,Id: Bribri0362\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0362\nSource: Pûs kapë'wa̠\nChange: ...,
4,Bribri0363,Pûs kapë'wa̠,"TENSE:IPFV_REC, ASPECT:IPFV",Pûs kapö̀wa̠,bribri,dev,Id: Bribri0363\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0363\nSource: Pûs kapë'wa̠\nChange: ...,
...,...,...,...,...,...,...,...,...,...
2670,Maya0469,Tene' ma' jbúuleni',PERSON:3_SI,Leti'e' ma' jbúuli',maya,train,Id: Maya0469\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0469\nSource: Tene' ma' jbúuleni'\nCha...,
2671,Maya0470,Tene' ma' jbúuleni',PERSON:1_PL,To'one' ma' jbúulo'oni',maya,train,Id: Maya0470\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0470\nSource: Tene' ma' jbúuleni'\nCha...,
2672,Maya0471,Tene' ma' jbúuleni',PERSON:2_PL,Te'exe' ma' jbúule'exi',maya,train,Id: Maya0471\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0471\nSource: Tene' ma' jbúuleni'\nCha...,
2673,Maya0121,Táan a bin koonol tu k'íiwikil koonol,TYPE:NEG,Ma' táan a bin koonol tu k'íiwikil koonoli',maya,train,Id: Maya0121\nSource: Táan a bin koonol tu k'í...,Id: Maya0121\nSource: Táan a bin koonol tu k'í...,


In [3]:
import re
def remove_single_spaces(text: str) -> str:
    # Replace single spaces between letters with no space
    return re.sub("\s+", " ", re.sub(r'(?<=\w|\') (?=\w|\')', '', text))

# Fixes unattached diacritics
def attach_diacritics(text: str) -> str:
    # Function to reorder each match
    def reorder(match):
        char, diacritic = match.groups()
        # Return the reordered string with the diacritic attached to the character
        return char + diacritic
    
    # Regular expression to find a character followed by a space and then the diacritic
    pattern = r'(\w) ([ ̀ ̠])'
    # Replace occurrences found by the pattern with the reordered version
    adjusted_text = re.sub(pattern, reorder, text)
    
    return adjusted_text

remove_single_spaces(attach_diacritics("P û s   k ë ̀   k u ̠   k a p ë ' w a ̠"))

"Pûs kë̀ ku̠ kapë'wa̠"

## Naive ICL

In [9]:
from openai import OpenAI
import re
from tqdm.notebook import tqdm

if 'api_key' not in vars():
    api_key = input("OpenAI API Key:")

client = OpenAI(api_key=api_key)


def run_prompt_full_context(lang, log_file, test_IDs=None):
    """Runs a GPT prompt for a specified row in the dev/test set. Uses the entire `train` split as context.

    Args:
        lang: 'bribri' | 'guarani' | 'maya'
        test_ID The ID of a row in the dev/test set to run inference on.
    """
    train_split = df[(df['language'] == lang) & (df['split'] == 'train')]

    if test_IDs is not None:
        test_sentences = df[df['ID'].isin(test_IDs)]
    else:
        test_sentences = df[(df['language'] == lang) & (df['split'] == 'dev')]

    system_prompt = f"You are an expert in the {lang.capitalize()} language. You are creating education materials by taking a given sentence in {lang.capitalize()} and a label indicating a change in one or more linguistic features, and outputting the sentence transformed by changing that feature."
    context = '\n\n'.join(train_split['Formatted'])
    test_examples = '\n\n'.join(test_sentences['Formatted_Covered'])


    prompt = f"""Below are examples of a sentence in {lang.capitalize()}, the linguistic change, and the target sentence after applying the change.
    
{context}

Below is a list of similar examples, where the source sentence and linguistic change are given, and the output sentence is not known. For each example, please output only the id and target sentence values, as in

ID: some id
Target: sentence after applying the change


Do not output any additional text, and do not output the Source or Change fields. This is very important, take your time and do not mess up or I will lose my job.

{test_examples}
    """

    completion = client.chat.completions.create(
        #model="gpt-4-turbo-preview",
        model="gpt-4-turbo-2024-04-09",

        #model = "gpt-3.5-turbo-0125",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        top_p=1,
        seed=430
    )
    print(completion.usage)
    print(completion.model)

    pattern = r"I[dD]: (\S+)\nTarget: (.*)(\n|$)"
    resp = completion.choices[0].message.content
    
    with open(log_file, 'a') as log:
        log.write("\n\nPROMPT:\n" + prompt)
        log.write("\nRESPONSE:\n" + resp)

    matches = re.findall(pattern, resp, re.M)
    matches_dict = dict()
    for match in matches:
        matches_dict[match[0]] = match[1] # remove_single_spaces(attach_diacritics(match[1]))
    return matches_dict


In [26]:
import math

def test_full_context(chunk_size, df):
    df = df.copy(deep=True)
    for language in tqdm(['bribri', 'guarani', 'maya']):
        lang_test_size = len(df[(df['language'] == language) & (df['split'] == 'dev')])

        for chunk in tqdm(range(math.ceil(lang_test_size / chunk_size))):
            print(f"Testing indices {chunk*chunk_size} through {(chunk+1)*chunk_size}")
            test_chunk = df[(df['language'] == language) & (df['split'] == 'dev')]['ID'].values[chunk*chunk_size: (chunk+1)*chunk_size]
            pred_dict = run_prompt_full_context(lang=language, test_IDs=test_chunk, log_file=f"./{language}.log")
            for pred_id, pred_string in pred_dict.items():
                df.loc[df['ID'] == pred_id, 'Predicted Target'] = pred_string

    df[(df['language'] == 'bribri') & (df['split'] == 'dev')].to_csv(f"../preds/chatgpt/fc_chunksize_{chunk_size}/bribri-dev-preds.tsv", sep="\t")
    df[(df['language'] == 'guarani') & (df['split'] == 'dev')].to_csv(f"../preds/chatgpt/fc_chunksize_{chunk_size}/guarani-dev-preds.tsv", sep="\t")
    df[(df['language'] == 'maya') & (df['split'] == 'dev')].to_csv(f"../preds/chatgpt/fc_chunksize_{chunk_size}/maya-dev-preds.tsv", sep="\t")

    return df

preds = test_full_context(chunk_size=80, df=df)

preds

ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

In [27]:
preds = test_full_context(chunk_size=20, df=df)
preds

ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

## Retrieval

Rather than passing full context, let's try selecting informative examples for each sentence. First, we'll split up sentences based on the linguistic change tags. Then, for each group of sentences, we'll retrieve train sentences with similar tags.

In [28]:
def run_prompt_naive_retrieval(lang, test_IDs):
    """Runs a GPT prompt for a specified row in the dev/test set. Retrieves items from the `train` split that have the same change tags.

    Args:
        lang: 'bribri' | 'guarani' | 'maya'
        test_ID The ID of a row in the dev/test set to run inference on.
    """
    train_split = df[(df['language'] == lang) & (df['split'] == 'train')]

    assert(test_IDs is not None)

    test_sentences = df[df['ID'].isin(test_IDs)]

    # Determine the tags that appear in the test sentences
    test_change_tags = test_sentences['Change'].unique()
    print(test_change_tags)
    filtered_train = train_split[train_split['Change'].isin(test_change_tags)]

    system_prompt = f"You are an expert in the {lang.capitalize()} language. You are creating education materials by taking a given sentence in {lang.capitalize()} and a label indicating a change in one or more linguistic features, and outputting the sentence transformed by changing that feature. All Bribri text is seperated by spaces."
    context = '\n\n'.join(filtered_train['Formatted'])
    test_examples = '\n\n'.join(test_sentences['Formatted_Covered'])


    prompt = f"""Below are examples of a sentence in {lang.capitalize()}, the linguistic change, and the target sentence after applying the change.
    
{context}

Below is a list of similar examples, where the source sentence and linguistic change are given, and the output sentence is not known. For each example, please output only the id and target sentence values, as in

ID: some id
Target: sentence after applying the change


Do not output any additional text, and do not output the Source or Change fields. This is very important, take your time and do not mess up or I will lose my job.

{test_examples}
    """

    print(prompt)

    completion = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        temperature=1,
        top_p=1,
        seed=430
    )
    print(completion.usage)
    print(completion.model)

    pattern = r"I[dD]: (\S+)\nTarget: (.*)(\n|$)"
    resp = completion.choices[0].message.content
    print(resp)
    matches = re.findall(pattern, resp, re.M)
    matches_dict = dict()
    for match in matches:
        matches_dict[match[0]] = match[1]
    return matches_dict

run_prompt_naive_retrieval("bribri", test_IDs=["Bribri0362", "Bribri0367"])


Exception ignored in: <function tqdm.__del__ at 0x13877e480>
Traceback (most recent call last):
  File "/Users/clairepost/opt/anaconda3/envs/pyfomaEnv/lib/python3.12/site-packages/tqdm/std.py", line 1149, in __del__
    self.close()
  File "/Users/clairepost/opt/anaconda3/envs/pyfomaEnv/lib/python3.12/site-packages/tqdm/notebook.py", line 278, in close
    self.disp(bar_style='danger', check_delay=False)
    ^^^^^^^^^
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


['TENSE:PRF_REC, ABSNUM:PL' 'TYPE:NEG, TENSE:PRF_PROG']
Below are examples of a sentence in Bribri, the linguistic change, and the target sentence after applying the change.
    
Id: Bribri0303
Source: Ye' shka'
Change: TYPE:NEG, TENSE:PRF_PROG
Target: Ye' kë̀ ku̠'bak shkö́k

Id: Bribri0962
Source: Ye' tö i sík
Change: TYPE:NEG, TENSE:PRF_PROG
Target: Ye' kë̀ ku̠'bak i skö́k

Id: Bribri0771
Source: Ye' dör àrros tó̠ ñè̠
Change: TYPE:NEG, TENSE:PRF_PROG
Target: Ye' kë̀ ku̠'bak àrros ta̠ú̠k ñè̠

Id: Bribri0493
Source: Ye' tö kàsir të'
Change: TYPE:NEG, TENSE:PRF_PROG
Target: Ye' kë̀ ku̠'bak kàsir tö́k

Id: Bribri0899
Source: Ie' tö bö' yë'stsa̠
Change: TENSE:PRF_REC, ABSNUM:PL
Target: Ie' tö bö' yéulur

Id: Bribri0380
Source: Ie' dúwa̠
Change: TYPE:NEG, TENSE:PRF_PROG
Target: Ie' kë̀ ku'bak dawö́kwa̠

Id: Bribri0611
Source: Be' dör ye' tsí bi'
Change: TYPE:NEG, TENSE:PRF_PROG
Target: Be' kë̀ ku̠'bak ye' tsí biö́k

Id: Bribri0691
Source: Chìchi tö Po'tak kö'
Change: TYPE:NEG, TENSE:PRF_PR

NotFoundError: Error code: 404 - {'error': {'message': 'The model `gpt-4-turbo-preview` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}

In [21]:
def run_prompt_smart_retrieval(lang, log_file, test_IDs):
    """Runs a GPT prompt for a specified row in the dev/test set. Retrieves items from the `train` split that have the same change tags and sub-change tags

    Args:
        lang: 'bribri' | 'guarani' | 'maya'
        test_ID The ID of a row in the dev/test set to run inference on.
    """
    train_split = df[(df['language'] == lang) & (df['split'] == 'train')]

    assert(test_IDs is not None)

    test_sentences = df[df['ID'].isin(test_IDs)]

    results = {}

    for test_sent in test_sentences.iterrows():
        test_sent=test_sent[1]

        # Determine the tags that appear in the test sentences
        test_change_tags = test_sent['Change'].split(", ")
        test_change_tags.append(test_sent['Change'])
        print(test_change_tags)
        filtered_train = train_split[train_split['Change'].isin(test_change_tags)]

        system_prompt = f"You are an expert in the {lang.capitalize()} language. You are creating education materials by taking a given sentence in {lang.capitalize()} and a label indicating a change in one or more linguistic features, and outputting the sentence transformed by changing that feature. All Bribri text is seperated by spaces."
        context = '\n\n'.join(filtered_train['Formatted'])
        test_examples = '\n\n' + test_sent['Formatted_Covered']


        prompt = f"""Below are examples of a sentence in {lang.capitalize()}, the linguistic change, and the target sentence after applying the change.
        
        {context}

        Below is a similar example, where the source sentence and linguistic change are given, and the output sentence is not known. For this example, please output only the id and target sentence values, as in

        ID: some id
        Target: sentence after applying the change


        Do not output any additional text, and do not output the Source or Change fields. This is very important, take your time and do not mess up or I will lose my job.

        {test_examples}
        """

        completion = client.chat.completions.create(
            #model="gpt-4-0613",
            model="gpt-3.5-turbo-0125",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ],
            temperature=0,
            top_p=1,
            seed=430
        )
        print(completion.usage)
        print(completion.model)

        pattern = r"I[dD]: (\S+)\nTarget: (.*)(\n|$)"
        resp = completion.choices[0].message.content
    
        with open(log_file, 'a') as log:
            log.write("\n\nPROMPT:\n" + prompt)
            log.write("\nRESPONSE:\n" + resp)

        matches = re.findall(pattern, resp, re.M)
        matches_dict = dict()
        for match in matches:
            matches_dict[match[0]] = remove_single_spaces(attach_diacritics(match[1]))
           # results[match[0]] = remove_single_spaces(attach_diacritics(match[1]))- for when we add in the smart split
            results[match[0]] = match[1]


    return results
# return matches_dict

run_prompt_smart_retrieval("bribri", "test.log", test_IDs=["Bribri0362"])

['TENSE:PRF_REC', 'ABSNUM:PL', 'TENSE:PRF_REC, ABSNUM:PL']
CompletionUsage(completion_tokens=20, prompt_tokens=834, total_tokens=854)
gpt-3.5-turbo-0125


{'Bribri0362': "Pûs kapë'wé̠"}

In [22]:
import math

out_folder = "preds/chatgpt/chatgpt-smart-context3/"

def test_smart_context(chunk_size, df):
    df = df.copy(deep=True)
    for language in  ['guarani', 'maya']: #['bribri']
        lang_test_size = len(df[(df['language'] == language) & (df['split'] == 'dev')])

        for chunk in range(math.ceil(lang_test_size / chunk_size)):
            print(f"Testing indices {chunk*chunk_size} through {(chunk+1)*chunk_size}")
            test_chunk = df[(df['language'] == language) & (df['split'] == 'dev')]['ID'].values[chunk*chunk_size: (chunk+1)*chunk_size]
            pred_dict = run_prompt_smart_retrieval(lang=language, test_IDs=test_chunk, log_file=f"{out_folder}/{language}-smart.log")
            for pred_id, pred_string in pred_dict.items():
                df.loc[df['ID'] == pred_id, 'Predicted Target'] = pred_string

    df[(df['language'] == 'bribri') & (df['split'] == 'dev')].to_csv(out_folder + "bribri-dev-preds-smart.tsv", sep="\t")
    df[(df['language'] == 'guarani') & (df['split'] == 'dev')].to_csv(out_folder + "guarani-dev-preds-smart.tsv", sep="\t")
    df[(df['language'] == 'maya') & (df['split'] == 'dev')].to_csv(out_folder + "maya-dev-preds-smart.tsv", sep="\t")

    return df

preds = test_smart_context(chunk_size=80, df=df)

preds

Testing indices 0 through 80
['TYPE:AFF', 'TYPE:AFF']
CompletionUsage(completion_tokens=15, prompt_tokens=740, total_tokens=755)
gpt-3.5-turbo-0125
['TENSE:FUT_SIM', 'TENSE:FUT_SIM']
CompletionUsage(completion_tokens=17, prompt_tokens=1123, total_tokens=1140)
gpt-3.5-turbo-0125
['PERSON:1_PL_INC', 'PERSON:1_PL_INC']
CompletionUsage(completion_tokens=19, prompt_tokens=737, total_tokens=756)
gpt-3.5-turbo-0125
['PERSON:1_SI', 'PERSON:1_SI']
CompletionUsage(completion_tokens=17, prompt_tokens=696, total_tokens=713)
gpt-3.5-turbo-0125
['PERSON:2_PL', 'PERSON:2_PL']
CompletionUsage(completion_tokens=18, prompt_tokens=545, total_tokens=563)
gpt-3.5-turbo-0125
['PERSON:2_SI', 'PERSON:2_SI']
CompletionUsage(completion_tokens=17, prompt_tokens=1038, total_tokens=1055)
gpt-3.5-turbo-0125
['TYPE:AFF', 'TYPE:AFF']
CompletionUsage(completion_tokens=17, prompt_tokens=741, total_tokens=758)
gpt-3.5-turbo-0125
['PERSON:3_PL', 'PERSON:3_PL']
CompletionUsage(completion_tokens=19, prompt_tokens=649, tota

Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Bribri0359,Pûs kapë'wa̠,ABSNUM:PL,Pûs kapë'ulur,bribri,dev,Id: Bribri0359\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0359\nSource: Pûs kapë'wa̠\nChange: ...,
1,Bribri0360,Pûs kapë'wa̠,TYPE:NEG,Pûs kë̀ kapë̀ne̠wa̠,bribri,dev,Id: Bribri0360\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0360\nSource: Pûs kapë'wa̠\nChange: ...,
2,Bribri0361,Pûs kapë'wa̠,TENSE:PRF_REC,Pûs kapówa̠,bribri,dev,Id: Bribri0361\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0361\nSource: Pûs kapë'wa̠\nChange: ...,
3,Bribri0362,Pûs kapë'wa̠,"TENSE:PRF_REC, ABSNUM:PL",Pûs kapóulur,bribri,dev,Id: Bribri0362\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0362\nSource: Pûs kapë'wa̠\nChange: ...,
4,Bribri0363,Pûs kapë'wa̠,"TENSE:IPFV_REC, ASPECT:IPFV",Pûs kapö̀wa̠,bribri,dev,Id: Bribri0363\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0363\nSource: Pûs kapë'wa̠\nChange: ...,
...,...,...,...,...,...,...,...,...,...
2670,Maya0469,Tene' ma' jbúuleni',PERSON:3_SI,Leti'e' ma' jbúuli',maya,train,Id: Maya0469\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0469\nSource: Tene' ma' jbúuleni'\nCha...,
2671,Maya0470,Tene' ma' jbúuleni',PERSON:1_PL,To'one' ma' jbúulo'oni',maya,train,Id: Maya0470\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0470\nSource: Tene' ma' jbúuleni'\nCha...,
2672,Maya0471,Tene' ma' jbúuleni',PERSON:2_PL,Te'exe' ma' jbúule'exi',maya,train,Id: Maya0471\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0471\nSource: Tene' ma' jbúuleni'\nCha...,
2673,Maya0121,Táan a bin koonol tu k'íiwikil koonol,TYPE:NEG,Ma' táan a bin koonol tu k'íiwikil koonoli',maya,train,Id: Maya0121\nSource: Táan a bin koonol tu k'í...,Id: Maya0121\nSource: Táan a bin koonol tu k'í...,


In [14]:
print(preds)

              ID                                 Source  \
0     Bribri0359                           Pûs kapë'wa̠   
1     Bribri0360                           Pûs kapë'wa̠   
2     Bribri0361                           Pûs kapë'wa̠   
3     Bribri0362                           Pûs kapë'wa̠   
4     Bribri0363                           Pûs kapë'wa̠   
...          ...                                    ...   
2670    Maya0469                    Tene' ma' jbúuleni'   
2671    Maya0470                    Tene' ma' jbúuleni'   
2672    Maya0471                    Tene' ma' jbúuleni'   
2673    Maya0121  Táan a bin koonol tu k'íiwikil koonol   
2674    Maya0122  Táan a bin koonol tu k'íiwikil koonol   

                           Change  \
0                       ABSNUM:PL   
1                        TYPE:NEG   
2                   TENSE:PRF_REC   
3        TENSE:PRF_REC, ABSNUM:PL   
4     TENSE:IPFV_REC, ASPECT:IPFV   
...                           ...   
2670                  PERSON:3_S

## predict test data for full-context

In [36]:
import math
def test_full_context_test_data(chunk_size, df):

    out_folder = f"preds/chatgpt/test-results/fc_{chunk_size}"

    df = df.copy(deep=True)
    for language in ['maya']:#, 'guarani', 'maya']):
        lang_test_size = len(df[(df['language'] == language) & (df['split'] == 'test')])

        for chunk in range(math.ceil(lang_test_size / chunk_size)):
            print(f"Testing indices {chunk*chunk_size} through {(chunk+1)*chunk_size}")
            test_chunk = df[(df['language'] == language) & (df['split'] == 'test')]['ID'].values[chunk*chunk_size: (chunk+1)*chunk_size]
            pred_dict = run_prompt_full_context(lang=language, test_IDs=test_chunk, log_file=f"./{language}-test-data.log")
            for pred_id, pred_string in pred_dict.items():
                df.loc[df['ID'] == pred_id, 'Predicted Target'] = pred_string

    # df[(df['language'] == 'bribri') & (df['split'] == 'test')].to_csv(f"{out_folder}/bribri-test-preds.tsv", sep="\t")
    # df[(df['language'] == 'guarani') & (df['split'] == 'test')].to_csv(f"{out_folder}/guarani-test-preds.tsv", sep="\t")
            df[(df['language'] == 'maya') & (df['split'] == 'test')].to_csv(f"{out_folder}/maya-test-preds.tsv", sep="\t")

    return df


preds = test_full_context_test_data(chunk_size=20, df=df)

preds

Testing indices 0 through 20
CompletionUsage(completion_tokens=431, prompt_tokens=29251, total_tokens=29682)
gpt-4-turbo-2024-04-09
Testing indices 20 through 40
CompletionUsage(completion_tokens=490, prompt_tokens=29313, total_tokens=29803)
gpt-4-turbo-2024-04-09
Testing indices 40 through 60
CompletionUsage(completion_tokens=492, prompt_tokens=29280, total_tokens=29772)
gpt-4-turbo-2024-04-09
Testing indices 60 through 80
CompletionUsage(completion_tokens=532, prompt_tokens=29318, total_tokens=29850)
gpt-4-turbo-2024-04-09
Testing indices 80 through 100
CompletionUsage(completion_tokens=514, prompt_tokens=29345, total_tokens=29859)
gpt-4-turbo-2024-04-09
Testing indices 100 through 120
CompletionUsage(completion_tokens=571, prompt_tokens=29375, total_tokens=29946)
gpt-4-turbo-2024-04-09
Testing indices 120 through 140
CompletionUsage(completion_tokens=557, prompt_tokens=29372, total_tokens=29929)
gpt-4-turbo-2024-04-09
Testing indices 140 through 160
CompletionUsage(completion_tokens

Unnamed: 0,ID,Source,Change,Target,language,split,Formatted,Formatted_Covered,Predicted Target
0,Bribri0359,Pûs kapë'wa̠,ABSNUM:PL,Pûs kapë'ulur,bribri,dev,Id: Bribri0359\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0359\nSource: Pûs kapë'wa̠\nChange: ...,
1,Bribri0360,Pûs kapë'wa̠,TYPE:NEG,Pûs kë̀ kapë̀ne̠wa̠,bribri,dev,Id: Bribri0360\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0360\nSource: Pûs kapë'wa̠\nChange: ...,
2,Bribri0361,Pûs kapë'wa̠,TENSE:PRF_REC,Pûs kapówa̠,bribri,dev,Id: Bribri0361\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0361\nSource: Pûs kapë'wa̠\nChange: ...,
3,Bribri0362,Pûs kapë'wa̠,"TENSE:PRF_REC, ABSNUM:PL",Pûs kapóulur,bribri,dev,Id: Bribri0362\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0362\nSource: Pûs kapë'wa̠\nChange: ...,
4,Bribri0363,Pûs kapë'wa̠,"TENSE:IPFV_REC, ASPECT:IPFV",Pûs kapö̀wa̠,bribri,dev,Id: Bribri0363\nSource: Pûs kapë'wa̠\nChange: ...,Id: Bribri0363\nSource: Pûs kapë'wa̠\nChange: ...,
...,...,...,...,...,...,...,...,...,...
2670,Maya0469,Tene' ma' jbúuleni',PERSON:3_SI,Leti'e' ma' jbúuli',maya,train,Id: Maya0469\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0469\nSource: Tene' ma' jbúuleni'\nCha...,
2671,Maya0470,Tene' ma' jbúuleni',PERSON:1_PL,To'one' ma' jbúulo'oni',maya,train,Id: Maya0470\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0470\nSource: Tene' ma' jbúuleni'\nCha...,
2672,Maya0471,Tene' ma' jbúuleni',PERSON:2_PL,Te'exe' ma' jbúule'exi',maya,train,Id: Maya0471\nSource: Tene' ma' jbúuleni'\nCha...,Id: Maya0471\nSource: Tene' ma' jbúuleni'\nCha...,
2673,Maya0121,Táan a bin koonol tu k'íiwikil koonol,TYPE:NEG,Ma' táan a bin koonol tu k'íiwikil koonoli',maya,train,Id: Maya0121\nSource: Táan a bin koonol tu k'í...,Id: Maya0121\nSource: Táan a bin koonol tu k'í...,


In [37]:
def format_output_tsv(lang, preds):

    results = preds[(preds['language'] == lang) & (preds['split'] == 'test')]
    results = results[["ID", "Source", "Change", "Predicted Target"]]
    results.columns = ['ID', 'Source', 'Change', 'Target']
    results.to_csv(f"test-preds/chatgpt/{lang}.tsv", sep = "\t", index = False)

format_output_tsv("maya", preds)
