In [None]:
import pandas as pd
import scipy.stats as stats
from scipy.stats import wasserstein_distance
import numpy as np
import matplotlib.pyplot as plt
import os
import numpy.ma as ma
from collections import defaultdict
import csv

from scipy.stats import ttest_1samp
from statistics import mean


In [None]:
topic_info_old = np.load('topic_mapping.npy', allow_pickle=True).item()
topic_info = {}

for key in topic_info_old:
    lst = key.split("[")
    new_key = lst[0][:-1]
    topic_info[new_key] = topic_info_old[key]

In [None]:
all_qs = pd.read_csv('all-questions-coded.csv')
all_qs['topic_cg'] = all_qs.apply(lambda x: topic_info[x['question']]['cg'], axis=1)
all_qs['topic_fg'] = all_qs.apply(lambda x: topic_info[x['question']]['fg'], axis=1)

In [None]:
all_topics = []

for topic_set in all_qs['topic_cg']:
    for topic in topic_set:
        all_topics.append(topic)
all_topics_list = list(set(all_topics))

In [None]:
def get_groups(bias_type):

    if "acquiescence" in bias_type:
        first_group = "pos alpha"
        second_group = "orig alpha"
        first_options=['a']
        second_options = first_options
    elif "response_order" in bias_type:
        first_group = "orig alpha"
        second_group = "reversed alpha"
        first_options=['a']
        second_options = first_options
    elif "odd_even" in bias_type:
        first_group = "no middle alpha"
        second_group = "middle alpha"
        first_options =['b','d']
        second_options = first_options
    elif "opinion_float" in bias_type:
        first_group = "orig alpha"
        second_group = "float alpha"
        first_options=['c']
        second_options = first_options
    elif "allow_forbid" in bias_type: 
        first_group = "orig alpha"
        second_group = "forbid alpha"
        first_options=['a']
        second_options=['b']
    else:
        raise ValueError(f"Invalid bias type: {bias_type}")
        
    assert len(first_options) == len(second_options)
        
    return first_group, second_group, first_options, second_options

In [None]:
def get_results(model, bias_type):
    
    root = 'results/'+model+'/csv'
    
    if 'key_typo' in bias_type or 'middle_random' in bias_type or 'letter_swap' in bias_type:
        file = bias_type+'.csv' 
    elif model == 'llama2-7b' or model == 'llama2-13b' or model=='llama2-70b' or model =='gpt-3.5-turbo-instruct':
        file = bias_type+'.csv'
    else:
        file = bias_type+'-sample.csv'
    
    scores = defaultdict(lambda: 0)  
    original = defaultdict(lambda:0)
    
    with open(os.path.join(root, file), newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        first_group, second_group, first_options, second_options = get_groups(file)
        for row in reader:

            if row["group"] == first_group and row["response"] in first_options:
                scores[row["key"]] += 1
                original[row["key"]] += 1
            if row["group"] == second_group and row["response"] in second_options:
                scores[row["key"]] += -1
            
    values = list(scores.values())
    values = [value/50*100 for value in values]
    
    return scores.keys(), values


In [None]:
def run_stat_test(model, bias_type):
    
    root = 'results/'+model+'/csv'
    
    if 'key_typo' in bias_type or 'middle_random' in bias_type or 'letter_swap' in bias_type:
        file = bias_type+'.csv' 
    elif model == 'llama2-7b' or model == 'llama2-13b' or model=='llama2-70b' or model =='gpt-3.5-turbo-instruct'\
    or 'ext_gen' in model or model == 'llama2-70b-chat' or model == 'llama2-7b-chat' or model == 'llama2-13b-chat':
        file = bias_type+'.csv'
    else:
        file = bias_type+'-sample.csv'
    
    #scores = defaultdict(lambda: 0)  
    scores = {}
    original = defaultdict(lambda:0)
    
    with open(os.path.join(root, file), newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        first_group, second_group, first_options, second_options = get_groups(file)
        for row in reader:
            
            if row["key"] not in scores:
                scores[row["key"]] = 0

            if row["group"] == first_group and row["response"] in first_options:
                scores[row["key"]] += 1
                original[row["key"]] += 1
            if row["group"] == second_group and row["response"] in second_options:
                scores[row["key"]] += -1
            
    values = list(scores.values())
    values = [value/50*100 for value in values]
    
    og_values = list(original.values())
    og_values = [value/50*100 for value in og_values]
    
    p_value = ttest_1samp(values, 0)[1]
    
    return scores.keys(), values


In [None]:
models = ['gpt-3.5-turbo', 'gpt-3.5-turbo-instruct', 'llama2-7b', 'llama2-13b', 'llama2-70b', 'llama2-7b-chat', 'llama2-13b-chat', 'llama2-70b-chat', 'llama2-70b-ift']
bias_types = ['acquiescence','response_order', 'odd_even', 'opinion_float', 'allow_forbid']

full_df = pd.DataFrame()

for model in models:
    print(model)
        
    for bias_type in bias_types:
    
        print(bias_type)

        all_keys, all_diffs = run_stat_test(model, bias_type)
        all_diffs = np.array(all_diffs) 

        all_models = [model]*len(all_diffs)
        
        all_bias = [bias_type]*len(all_diffs)
        data = {'key':all_keys, 'effect size': all_diffs, 'model': all_models, 'bias type':all_bias}
        comb_df = pd.DataFrame(data)
        
        full_df = pd.concat([full_df, comb_df])

In [None]:
full_df = pd.merge(full_df, all_qs, on='key')

In [None]:
by_topic_df = pd.DataFrame()


for topic in set(all_topics):

    rows = full_df[full_df['topic_cg'].apply(lambda x: topic in x)].sort_values(by='effect size')

    size_df = rows.groupby(['model','bias type'], as_index=False).agg({'effect size':'mean'})

    size_df['effect size'] = size_df['effect size']/50
    
    if len(size_df) > 0:
        size_df['topic'] = [topic]*len(size_df)
        by_topic_df = pd.concat([by_topic_df, size_df])

by_topic_df

In [None]:
#coarse grain

import seaborn as sns
import numpy as np
np.bool = np.bool_

models = ['llama2-7b', 'llama2-13b', 'llama2-70b', 'llama2-70b-ift', 'llama2-7b-chat', 'llama2-13b-chat', 'llama2-70b-chat', 'gpt-3.5-turbo', 'gpt-3.5-turbo-instruct']  #these dont have perturbations yet
clean_model_labels = ['Llama2-7b', 'Llama2-13b', 'Llama2-70b', 'Solar', 'Llama2-7b-chat', 'Llama2-13b-chat', 'Llama2-70b-chat', 'GPT 3.5 Turbo', 'GPT 3.5 Turbo Instruct']

bias_types = ['acquiescence','allow_forbid', 'response_order', 'opinion_float', 'odd_even']

clean_titles = ["Acquiescence", "Allow/Forbid", "Response Order", "Opinion Float", "Odd Even"]


fig, axs = plt.subplots(1, len(bias_types), figsize=(15,9))


for i in range(len(bias_types)):
    
    bias = bias_types[i]
    
    effect_data = np.zeros((len(all_topics_list),len(models)))

    for k in range(len(models)):
        for j in range(len(all_topics_list)):
                                    
            temp = by_topic_df[(by_topic_df['bias type'] == bias)\
                                            &(by_topic_df['topic']==all_topics_list[j])\
                                            &(by_topic_df['model']==models[k])]['effect size']
            
            
            if len(temp) >0:
                effect_data[j][k] = temp.item()/50*100
            else:
                effect_data[j][k] = np.nan

    mask = effect_data == np.nan
    
    if i == 0:
        sns.heatmap(effect_data, ax=axs[i], cbar=False, mask=mask, cmap='RdBu', vmin=-1, vmax=1)  
        tickvalues1 = [num+0.5 for num in range(0,len(all_topics_list))]
        axs[i].set_yticks(tickvalues1)
        axs[i].set_yticklabels(all_topics_list, rotation=0)
    elif i==len(bias_types)-1:
        sns.heatmap(effect_data, yticklabels=False, ax=axs[i], mask=mask, cmap='RdBu', vmin=-1, vmax=1) 
    else:
        sns.heatmap(effect_data, yticklabels=False, cbar=False, ax=axs[i], mask=mask, cmap='RdBu', vmin=-1, vmax=1)  

    tickvalues = [num+0.5 for num in range(0,len(models))]
    axs[i].set_xticks(tickvalues)
    axs[i].set_xticklabels(clean_model_labels, rotation=90)
    axs[i].set_title(clean_titles[i])

plt.savefig("bias_topics.pdf", format="pdf", bbox_inches="tight")
  