In [1]:
import sys
import numpy as np
import pandas as pd 

from utils import load_hidden_representations_from_hdf5, read_templates_from_file

In [2]:
# params
log_dir = "/logfiles"
module = "decoder"
task = "wic"

In [3]:
df_prompts = read_templates_from_file(f"/t0-analysis/prompts/{task}/all.csv")
# df_prompts = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_prompt.csv")
# df = read_templates_from_file(f"/t0-analysis/prompts/{task}/fixed_target_yes_no.csv")
display(df_prompts)

Unnamed: 0,name,template,category,includes_targets,targets,target_ids,shuffle
0,gpt_3_without_targets,{sentence1} {sentence2} Question: Is the word ...,instructive,False,"▁No, ▁no, ▁Yes, ▁yes","0, 0, 1, 1",False
1,gpt_3_yes_no_with_targets,{sentence1} {sentence2} Question: Is the word ...,instructive,True,"▁No, ▁Yes","0, 1",False
2,affirmation_with_targets,Sentence A: {sentence1} Sentence B: {sentence2...,instructive,True,"▁Fal, ▁True","0, 1",False
3,grammar_homework_with_targets,"Decide whether the word ""{word}"" is used with ...",instructive,True,"▁No, ▁Yes","0, 1",False
4,polysemous_with_targets,"The word ""{word}"" has multiple meanings. Does ...",instructive,True,"▁No, ▁Yes","0, 1",False
5,question_context_without_targets,"Determine if the word ""{word}"" is used in the ...",instructive,False,"▁No, ▁no, ▁Yes, ▁yes","0, 0, 1, 1",False
6,question_meaning_without_targets,"Determine if the word ""{word}"" have the same m...",instructive,False,"▁No, ▁no, ▁Yes, ▁yes","0, 0, 1, 1",False
7,question_meaning_yes_no_with_targets,"Does the word ""{word}"" have the same meanining...",instructive,True,"▁No, ▁Yes","0, 1",False
8,same_sense_with_targets,Sentence 1: {sentence1} Sentence 2: {sentence2...,instructive,True,"▁No, ▁Yes","0, 1",False
9,similar_sense_without_targets,{sentence1} {sentence2} Similar sense of {word}?,instructive,False,"▁No, ▁no, ▁Yes, ▁yes","0, 0, 1, 1",False


## Compute task performance

In [4]:
models = {}

for model in ["bigscience-T0_3B", "bigscience-T0", "google-t5-xl-lm-adapt", "google-t5-xxl-lm-adapt"]:
# for model in ["bigscience-T0_3B"]:
# for model in ["bigscience-T0"]:
    # load prompted inputs and labels
    df_inputs = {}
    for _, row in df_prompts.iterrows():
        prompt = row['name']
        df_inputs[prompt] = pd.read_csv(f"/logfiles/{task}/{model}/{module}/{prompt}/prompted_samples.csv", sep='\t', index_col=0)
        
    # compute performance for each prompt
    performances = {"task performance": []}
    for template in df_inputs.keys():
        df = df_inputs[template]
        predictions = [str(v) for v in df["prediction"].values]
        labels = [str(v) for v in df["label"].values]
        correct_predictions = [1 if p == l else 0 for (p,l) in zip(predictions, labels)]
        performances["task performance"].append(np.sum(correct_predictions) / len(df))

    performances_df = pd.DataFrame(performances, index=df_inputs.keys())
    models[model] = performances_df

In [5]:
models.keys()

dict_keys(['bigscience-T0_3B', 'bigscience-T0', 'google-t5-xl-lm-adapt', 'google-t5-xxl-lm-adapt'])

In [6]:
for model in models:
    print(model)
    display(models[model])

bigscience-T0_3B


Unnamed: 0,task performance
gpt_3_without_targets,0.50627
gpt_3_yes_no_with_targets,0.507837
affirmation_with_targets,0.504702
grammar_homework_with_targets,0.5
polysemous_with_targets,0.50627
question_context_without_targets,0.49373
question_meaning_without_targets,0.487461
question_meaning_yes_no_with_targets,0.482759
same_sense_with_targets,0.501567
similar_sense_without_targets,0.015674


bigscience-T0


Unnamed: 0,task performance
gpt_3_without_targets,0.50627
gpt_3_yes_no_with_targets,0.54232
affirmation_with_targets,0.562696
grammar_homework_with_targets,0.319749
polysemous_with_targets,0.579937
question_context_without_targets,0.504702
question_meaning_without_targets,0.507837
question_meaning_yes_no_with_targets,0.00627
same_sense_with_targets,0.581505
similar_sense_without_targets,0.130094


google-t5-xl-lm-adapt


Unnamed: 0,task performance
gpt_3_without_targets,0.0
gpt_3_yes_no_with_targets,0.0
affirmation_with_targets,0.0
grammar_homework_with_targets,0.0
polysemous_with_targets,0.0
question_context_without_targets,0.0
question_meaning_without_targets,0.0
question_meaning_yes_no_with_targets,0.0
same_sense_with_targets,0.0
similar_sense_without_targets,0.0


google-t5-xxl-lm-adapt


Unnamed: 0,task performance
gpt_3_without_targets,0.0
gpt_3_yes_no_with_targets,0.0
affirmation_with_targets,0.0
grammar_homework_with_targets,0.0
polysemous_with_targets,0.0
question_context_without_targets,0.0
question_meaning_without_targets,0.0
question_meaning_yes_no_with_targets,0.0
same_sense_with_targets,0.098746
similar_sense_without_targets,0.0
