In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from statsmodels.stats.contingency_tables import cochrans_q
from scipy.stats import friedmanchisquare
from utils.util import get_results_df
import pandas as pd
import json
import numpy as np

In [None]:
truthfulqa = get_results_df("truthfulqa_hits.csv")

In [None]:
for col in truthfulqa.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(truthfulqa[col].tolist()).T.to_numpy())))

In [None]:
truthfulqa_refusals = get_results_df("truthfulqa_refusals.csv")

In [None]:
for col in truthfulqa_refusals.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(truthfulqa_refusals[col].tolist()).T.to_numpy())))

In [None]:
mmlu = get_results_df("mmlu_hits.csv")

In [None]:
for col in mmlu.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(mmlu[col].tolist()).T.to_numpy())))

In [None]:
mmlu_refusals = get_results_df("mmlu_refusals.csv")

In [None]:
for col in mmlu_refusals.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(mmlu_refusals[col].tolist()).T.to_numpy())))

In [None]:
bbq = get_results_df("bbq_hits.csv")

In [None]:
for col in bbq.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(bbq[col].tolist()).T.to_numpy())))

In [None]:
biased = get_results_df("bbq_biased.csv")

In [None]:
for col in biased.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(biased[col].tolist()).T.to_numpy())))

In [None]:
unknown = get_results_df("bbq_unknown.csv")

In [None]:
for col in unknown.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(unknown[col].tolist()).T.to_numpy())))

In [None]:
bbq_refusals = get_results_df("bbq_refusals.csv")

In [None]:
for col in bbq_refusals.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(bbq_refusals[col].tolist()).T.to_numpy())))

In [None]:
attitudes = get_results_df("attitude_answers.csv", by_attitude=True)

In [None]:
no_control = attitudes[~attitudes.persona_cat.str.contains("control")]
control_only = attitudes[attitudes.persona_cat.str.contains("control")]

In [None]:
for attitude in attitudes.iloc[0,0].keys():
    print(attitude)
    for col in attitudes.columns[:-1]:
        answers = attitudes[col].apply(lambda x: x[attitude]).map(lambda x: np.mean(x, axis=1))
        result = friedmanchisquare(*answers.tolist())
        if result.pvalue > .05:
            print(col, result.pvalue)
    print("======================")

In [None]:
pvalues = pd.DataFrame()

In [None]:
for attitude in attitudes.iloc[0,0].keys():
    print(attitude)
    for col in attitudes.columns[:-1]:
        answers = no_control[col].apply(lambda x: x[attitude]).map(lambda x: np.mean(x, axis=1))
        result = friedmanchisquare(*answers.tolist())
        if result.pvalue > .05:
            print(col, result.pvalue)
        pvalues.loc[attitude, f"{col} (personas)"] = result.pvalue
    print("======================")

In [None]:
for attitude in attitudes.iloc[0,0].keys():
    print(attitude)
    for col in attitudes.columns[:-1]:
        answers = control_only[col].apply(lambda x: x[attitude]).map(lambda x: np.mean(x, axis=1))
        result = friedmanchisquare(*answers.tolist())
        if result.pvalue > .05:
            print(col, result.pvalue)
        pvalues.loc[attitude, f"{col} (control)"] = result.pvalue
    print("======================")

In [None]:
pvalues = pvalues.round(3)

In [None]:
pvalues

In [None]:
pvalues = pvalues[[
    "GPT-4 (personas)",
    "GPT-3.5 (personas)",
    "Mixtral (personas)",
    "Zephyr (personas)",
    "Mistral-inst (personas)",
    "Gemma-7b-inst (personas)",
    "Gemma-2b-inst (personas)",
    "GPT-4 (control)",
    "GPT-3.5 (control)",
     "Mixtral (control)",
    "Zephyr (control)",
    "Mistral-inst (control)",
    "Gemma-7b-inst (control)",
    "Gemma-2b-inst (control)",
]]

In [None]:
table = pvalues.T.style.format(precision=3)

In [None]:
table.highlight_between(axis=0, left=.05, props="textbf:--rwrap;")

In [None]:
print(table.to_latex())

In [None]:
truthfulqa_refusals = get_results_df("truthfulqa_refusals.csv")

In [None]:
for col in truthfulqa_refusals.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(truthfulqa_refusals[col].tolist()).T.to_numpy())))

In [None]:
attitude_refusals = get_results_df("attitude_refusals.csv", aggregate=False)

In [None]:
for col in attitude_refusals.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(attitude_refusals[col].tolist()).T.to_numpy())))

In [None]:
offensiveness = get_results_df("off_scores.csv")

In [None]:
for col in offensiveness.columns[:-1]:
    print(friedmanchisquare(*offensiveness[col].tolist()))

In [None]:
off_refusals = get_results_df("off_refusals.csv")

In [None]:
for col in off_refusals.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(off_refusals[col].tolist()).T.to_numpy())))

In [None]:
rac = get_results_df("rac_scores.csv")

In [None]:
for col in rac.columns[:-1]:
    print(friedmanchisquare(*rac[col].tolist()))

In [None]:
rac_refusals = get_results_df("rac_refusals.csv")

In [None]:
off_refusals["Zephyr"].map(np.sum).sort_values()

In [None]:
rac_refusals["Zephyr"].map(np.sum).sort_values()

In [None]:
def get_preds(dataset, model, persona, prompt_type):
    return json.load(open(f"./results/{prompt_type}/{model}/{dataset}/{persona}.json", "r"))

In [None]:
size = len(rac_refusals["Zephyr"]["a_person_with_conservatism_ideology"])

In [None]:
index = np.where(rac_refusals["Zephyr"]["a_person_with_conservatism_ideology"] == 1)[0][0]

In [None]:
get_preds("toxicity", "zephyr-7b-beta", "a_person_with_conservatism_ideology", "zero")[size+index]

In [None]:
for col in rac_refusals.columns[:-1]:
    print(print(cochrans_q(pd.DataFrame(rac_refusals[col].tolist()).T.to_numpy())))

In [None]:
attitude_refusals.iloc[:,:-1].applymap(np.mean).mean().mean()

In [None]:
attitude_refusals.iloc[:,:-1].applymap(np.mean)

In [None]:
off_refusals.iloc[:,:-1].applymap(np.mean).mean().mean()

In [None]:
rac_refusals.iloc[:,:-1].applymap(np.mean).mean().mean()

In [None]:
truthfulqa_refusals.iloc[:,:-1].applymap(np.mean).mean().mean()

In [None]:
mmlu_refusals.iloc[:,:-1].applymap(np.mean).mean().mean()

In [None]:
bbq_refusals.iloc[:,:-1].applymap(np.mean).mean().mean()

In [None]:
off_refusals.iloc[:,:-1].applymap(np.mean).mean()

In [None]:
bbq_refusals.iloc[:,:-1].applymap(np.mean).mean()

In [None]:
attitude_refusals.iloc[:,:-1].applymap(np.mean).loc["empty"] 

In [None]:
attitude_refusals.iloc[:,:-1].applymap(np.mean).sort_values("GPT-4").loc[attitude_refusals.persona_cat=="political_figure"]

In [None]:
off_refusals.iloc[:,:-1].applymap(np.mean).loc["empty"] 

In [None]:
rac_refusals.iloc[:,:-1].applymap(np.mean).loc["empty"] 

In [None]:
truthfulqa_refusals.iloc[:,:-1].applymap(np.mean).loc["empty"] 

In [None]:
mmlu_refusals.iloc[:,:-1].applymap(np.mean).loc["empty"] 

In [None]:
bbq_refusals.iloc[:,:-1].applymap(np.mean).loc["empty"] 