In [None]:
import pandas as pd
import scipy.stats as stats
from scipy.stats import wasserstein_distance
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
from collections import defaultdict
from scipy.stats import ttest_1samp
from statistics import mean
from utils import *

In [None]:
def run_stat_test_steer(original_model, model, original_bias_type, bias_type):
    
    root = '../results/'+model+'/csv'
    
    if 'key_typo' in bias_type or 'middle_random' in bias_type or 'letter_swap' in bias_type:
        file = bias_type+'.csv' 
    elif model == 'llama2-70b-ift' or model == 'gpt-3.5-turbo':
        file = bias_type+'-sample.csv'
    else:
        file = bias_type+'.csv'
    
    scores = defaultdict(lambda: 0)  
    sample_count = defaultdict(lambda: 0)
    keys = []
    
    with open(os.path.join(root, file), newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        first_group, second_group, first_options, second_options = get_groups(file)
        for row in reader:
            if row["group"] == second_group and row["response"] in second_options:
                scores[row["key"]] += -1
            sample_count[row["key"]] += 1
            
            keys.append(row["key"])
    
    bias_type = original_bias_type
    
    root = '../results/'+original_model+'/csv'
    if 'key_typo' in bias_type or 'middle_random' in bias_type or 'letter_swap' in bias_type:
        file = bias_type+'.csv' 
    elif original_model == 'llama2-70b-ift' or original_model == 'gpt-3.5-turbo':
        file = bias_type+'-sample.csv'
    else:
        file = bias_type+'.csv'
            
    keys = set(keys)
    with open(os.path.join(root, file), newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        first_group, second_group, first_options, second_options = get_groups(file)
        for row in reader:
            if row["group"] == first_group and row["response"] in first_options and\
            row["key"] in keys and sample_count[row["key"]]>0:
                scores[row["key"]] += 1
                sample_count[row["key"]] -= 1
                
    
    values = list(scores.values())
    values = [value/50*100 for value in values]
    
    p_value = ttest_1samp(values, 0)[1]
    
    return mean(values), p_value


In [None]:
models = ['gpt-3.5-turbo', 'gpt-3.5-turbo-instruct']
bias_types = ['response_order','allow_forbid']

all_results = []
for model in models:
    for bias_type in bias_types:
        
        values, p_value, keys = run_stat_test(model, bias_type)
        diff = mean(values)
        lst = [model, bias_type, round(diff,4), round(p_value,4)]

        new_model = model+'-steer'
        diff, p_value = run_stat_test_steer(model, new_model, bias_type, bias_type)
        lst.append(round(diff,4))
        lst.append(round(p_value,4))
        all_results.append(lst)
        
        new_model = model+'-steer'
        diff, p_value = run_stat_test_steer(model, new_model, bias_type, bias_type+"-steer2")
        lst.append(round(diff,4))
        lst.append(round(p_value,4))


In [None]:
comb_df = pd.DataFrame(all_results, columns = ['model','bias type', 'old effect', "pval", 'steer effect', 'steer pval', 'steer 2 effect', 'steeer 2 pval'])


In [None]:
comb_df