In [None]:
import numpy as np
import pandas as pd
from tqdm.autonotebook import tqdm

In [2]:
LABEL     = 'product'
CV_SPLITS = [0]#, 1, 2, 3, 4]
SHOTS     = 2

# Load prompts: 

In [3]:
prompts = pd.read_csv(f'prompts_{LABEL}_{SHOTS:d}-shot.csv', index_col=0)
prompts.head()

Unnamed: 0,cv_split,label,prompt_all,prompt_conformal_35%,prompt_conformal_40%,prompt_conformal_50%,prompt_max-10,prompt_max-5,prompt_sim-10,prompt_sim-20,...,output_min_sim-20,output_min_sim-5,output_raw_all,output_raw_conformal_35%,output_raw_conformal_50%,output_raw_max-10,output_raw_max-5,output_raw_sim-10,output_raw_sim-20,output_raw_sim-5
0,0,other types of meat,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,chilli powder,hot chilli and hot chilli products,,,,,,,,
1,0,Not classified pork meat,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,frozen poultry products,boneless beef,,,,,,,,
2,0,prepared dish,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,sesame paste,precooked cooked beef meat products,,,beef products,,,,,
3,0,ground beef meat,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,beef products,beef trimmings,,,ground beef,,,,,
4,0,chicken breast,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,mechanically separated chicken meat,chicken breast,,,chicken breast,,,,,


# Evaluate prompts:

In [4]:
row_mask = np.bitwise_or.reduce([prompts.cv_split.values == split for split in CV_SPLITS])
row_mask.sum()

1510

In [5]:
columns = [col[7:] for col in prompts.columns if col.startswith('prompt_')]
columns

['all',
 'conformal_35%',
 'conformal_40%',
 'conformal_50%',
 'max-10',
 'max-5',
 'sim-10',
 'sim-20',
 'sim-5']

conformal only:

In [None]:
for col in columns:
    results = []

    for prompt in tqdm(prompts.loc[row_mask, f'prompt_{col}'].values):
        # get unique classes:
        classes = np.unique([example.split(' -> ')[1] for example in prompt.split('\n')[2:-3]])

        # if more than one class -> drop:
        if len(classes) == 1: results.append(classes[0])
        else: results.append(None)

    # save "prediction":
    col_name = f'output_raw_{col}'

    if col_name not in prompts.columns:
        prompts[col_name] = [''] * len(prompts)

    prompts.loc[row_mask, col_name] = results


min performance:

In [None]:
for col in columns:
    results = []

    for prompt in tqdm(prompts.loc[row_mask, f'prompt_{col}'].values):
        # get unique classes:
        classes = np.unique([example.split(' -> ')[1] for example in prompt.split('\n')[2:-3]])

        # if more than one class -> predict random:
        if len(classes) > 0:    results.append(np.random.choice(classes))
        else:                   results.append(None)

    # save "prediction":
    col_name = f'output_min_{col}'

    if col_name not in prompts.columns:
        prompts[col_name] = [''] * len(prompts)

    prompts.loc[row_mask, col_name] = results

max performance:

In [None]:
for col in columns:
    results = []

    for label, prompt in tqdm(prompts.loc[row_mask, ['label', f'prompt_{col}']].values):
        # get unique classes:
        classes = np.unique([example.split(' -> ')[1] for example in prompt.split('\n')[2:-3]])

        # true class not in set -> predict random:
        if label in classes:    results.append(label)
        elif len(classes) > 0:  results.append(np.random.choice(classes))
        else:                   results.append(None)

    # save "prediction":
    col_name = f'output_max_{col}'

    if col_name not in prompts.columns:
        prompts[col_name] = [''] * len(prompts)

    prompts.loc[row_mask, col_name] = results

# Save results:

In [9]:
prompts.head()

Unnamed: 0,cv_split,label,prompt_all,prompt_conformal_35%,prompt_conformal_40%,prompt_conformal_50%,prompt_max-10,prompt_max-5,prompt_sim-10,prompt_sim-20,...,output_raw_conformal_35%,output_raw_conformal_50%,output_raw_max-10,output_raw_max-5,output_raw_sim-10,output_raw_sim-20,output_raw_sim-5,output_raw_conformal_40%,output_min_conformal_40%,output_max_conformal_40%
0,0,other types of meat,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,,,,,,,,,thermal processed pork meat,ready to eat - cook meals
1,0,Not classified pork meat,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,,,,,,,,,jam,thermal processed pork meat
2,0,prepared dish,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,,beef products,,,,,,,instant coffee,precooked cooked beef meat products
3,0,ground beef meat,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,,ground beef,,,,,,,cheese,ground beef
4,0,chicken breast,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,Context start:\nWe are looking for food produc...,...,,chicken breast,,,,,,,chicken based products,chicken breast


In [10]:
prompts.to_csv(f'prompts_{LABEL}_{SHOTS:d}-shot.csv')