In [None]:
import pandas as pd
import scipy.stats as stats
from scipy.stats import wasserstein_distance
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
from collections import defaultdict
from scipy.stats import ttest_1samp
from statistics import mean
import ast
from utils import *

In [None]:
def get_max_wd(ordered_ref_weights):
    d0, d1 = np.zeros(len(ordered_ref_weights)), np.zeros(len(ordered_ref_weights))
    d0[np.argmax(ordered_ref_weights)] = 1
    d1[np.argmin(ordered_ref_weights)] = 1
    max_wd = wasserstein_distance(ordered_ref_weights, ordered_ref_weights, d0, d1)
    return max_wd

In [None]:
def get_x_ord(num_item, bias_type):
    
    if bias_type in ['acquiescence' , 'response_order', 'allow_forbid']:
        return [num*1.0 for num in range(1, num_item+1)]
    elif bias_type == 'odd_even' or bias_type == 'opinion_float':
        if num_item == 4:
            return [1.0, 2.0, 4.0, 5.0]
        else:
            return [1.0, 2.0, 3.0, 4.0, 5.0]
    else:
        print("not implemented")
        
    

In [None]:
#compute wasserstein distances

models = ['llama2-7b', 'llama2-13b', 'llama2-70b', 'llama2-70b-ift', 'llama2-7b-chat', 'llama2-13b-chat', 'llama2-70b-chat', 'gpt-3.5-turbo', 'gpt-3.5-turbo-instruct']
bias_types = ['acquiescence','response_order', 'odd_even', 'allow_forbid', 'opinion_float'] 

w_dists = []

for model in models:
    print(model)
        
    for bias_type in bias_types:
    
        print(bias_type)
        
        if bias_type == 'opinion_float':
            new_bias_type = "odd_even"
        else:
            new_bias_type = bias_type
        
        model_df = pd.read_pickle('../dist/'+model+'_dist/'+new_bias_type+'.pickle') 
        
        if new_bias_type == 'odd_even':
            new_bias_type = "odd_even-opinion_float"
            
        human_df = pd.read_pickle('../dist/human_dist/'+new_bias_type+'.pickle')
        
        comb_df = pd.merge(model_df, human_df, on="key")
                
        for key in comb_df['key']:
            
            model_dist = comb_df[comb_df['key']==key]['distribution_x'].item()
            model_dist = ast.literal_eval(model_dist)
                        
            num_items = len(model_dist.keys())
            
            model_dist = [model_dist[key] for key in model_dist.keys()]
            
            human_dist = comb_df[comb_df['key']==key]['distribution_y'].item()
            human_dist = ast.literal_eval(human_dist)
            human_dist = [human_dist[key] for key in human_dist.keys()]
            
            x_ordinal = get_x_ord(num_items, bias_type)
            
            dist = wasserstein_distance(x_ordinal, x_ordinal ,model_dist, human_dist) / get_max_wd(x_ordinal)

            w_dists.append([key, bias_type, model, dist])

dist_df = pd.DataFrame(w_dists, columns = ['key', 'bias type', 'model', 'w_dist'])

In [None]:
set(dist_df['bias type'])

In [None]:
models = ['llama2-7b', 'llama2-13b', 'llama2-70b', 'llama2-70b-ift', 'llama2-7b-chat', 'llama2-13b-chat', 'llama2-70b-chat', 'gpt-3.5-turbo', 'gpt-3.5-turbo-instruct']
bias_types = ['acquiescence','response_order', 'odd_even', 'opinion_float', 'allow_forbid']

effect_lst = []

for model in models:
    print(model)
        
    for bias_type in bias_types:
    
        print(bias_type)

        scores, p_value, keys = run_stat_test(model, bias_type)       
        
        for score, key in zip(scores,keys):
            effect_lst.append([key, bias_type, model, score/50.])

effect_df = pd.DataFrame(effect_lst, columns = ['key', 'bias type', 'model', 'effect'])

In [None]:
new_df = pd.merge(effect_df, dist_df)


In [None]:
1-new_df.groupby(['model'])['w_dist'].mean()