In [None]:
import pandas as pd
import string
import scipy.stats as stats
from scipy.stats import wasserstein_distance
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
def get_human_responses(bias_type):    
    df = pd.read_csv('../data/pew_categorized/'+bias_type+'.csv')
    # merged_df = pd.merge(prompts, all_qs.loc[:, ['key', 'Wave']], on='key').dropna()
    human_dist_df = pd.DataFrame(columns=['wave', 'key', 'distribution'])
    
    for index, row in df.iterrows():
        key = row['key']
        wave = row['Wave']
        if wave == "W53":
            wave = "W54"
        if not pd.isna(wave):
            human_df = pd.read_csv('../data/human_resp/American_Trends_Panel_'+wave+'/responses.csv')
            info_df = pd.read_csv('../data/human_resp/American_Trends_Panel_'+wave+'/info.csv')
            info_df["references"] = info_df["references"].fillna("[]").apply(lambda x: eval(x))
            responses = list(human_df[key])
            options = list(info_df[info_df['key'] == key].references)[0][:-1]    # ignoring "Refused" option
            # print(options)
            num_options = len(options)
            # print(num_options)
            alpha_responses = {}
            for i in range(num_options):
                alpha_responses[string.ascii_lowercase[i]] = 0
            for response in responses:
                if response in options:
                    i = options.index(response)
                    count = alpha_responses[string.ascii_lowercase[i]]
                    alpha_responses[string.ascii_lowercase[i]] = count + 1
            total = sum(alpha_responses.values())
            for alpha in alpha_responses:
                alpha_responses[alpha] = alpha_responses[alpha]/total
            question_row = pd.DataFrame(
                    {
                        "wave": wave,
                        "key": key,
                        "distribution": str(alpha_responses),
                    },
                    index=[0],
                )
            human_dist_df = pd.concat([human_dist_df, question_row])
    if not os.path.exists(f"data/dist/human_dist/"):
        os.makedirs(f"data/dist/human_dist/")    
    print(human_dist_df.head())
    human_dist_df.to_pickle(f"data/dist/human_dist/{bias_type}.pickle")

In [None]:
def get_model_responses(model, bias_type):
    model_dist_df = pd.DataFrame(columns=['key', 'distribution'])
    diff_models = ['llama2-7b', 'llama2-13b', 'llama2-70b']
    sample_models = ['llama2-70b-ift', 'gpt-3.5-turbo']
    bias_file_name = bias_type
    if bias_type == 'odd_even' or bias_type =='opinion_float':
        bias_file_name = 'odd_even-opinion_float'
    human_df = pd.read_pickle(f'../data/dist/human_dist/{bias_file_name}.pickle')
    human_df["distribution"] = human_df["distribution"].fillna("{}").apply(lambda x: eval(x))

    if model in diff_models and bias_type != 'allow_forbid':
        df_1 = pd.read_pickle(f'../results/{model}/{bias_type}-50.pickle')
        df_2 = pd.read_pickle(f'../results/{model}/{bias_type}-50diff.pickle')
        df = pd.concat([df_1, df_2])
    elif model in sample_models:
        df = pd.read_pickle(f'../results/{model}/{bias_type}-sample.pickle')
    else:
        df = pd.read_pickle(f'../results/{model}/{bias_type}.pickle')

    if bias_type != 'odd_even':
        df = df[df['type'] == 'orig alpha']
    else:
        df_odd_even_opinion = pd.DataFrame(columns=['key', 'responses'])
        for index, row in human_df.iterrows():
            key = row['key']
            num_options = len(row.distribution.keys())
            if num_options % 2 == 0:        # if even, associated row in model results is 'no middle alpha'
                row_df = df.loc[(df['key'] == key) & (df['type'] == 'no middle alpha')]
                row = row_df.iloc[0]
                responses = row.responses
                print(key, 'no middle alpha')
            else:
                row_df = df.loc[(df['key'] == key) & (df['type'] == 'middle alpha')]
                row = row_df.iloc[0]
                responses = row.responses
                print(key, 'middle alpha')
            question_row = pd.DataFrame(
                {
                    "key": key,
                    "responses": responses,
                },
                index=[0],
            )
            df_odd_even_opinion = pd.concat([df_odd_even_opinion, question_row])
        df = df_odd_even_opinion

    invalid = 0
    for index, row in df.iterrows():
        key = row['key']
        if not key == "ANES1" and not key == "ANES2":
            num_options = len(human_df[human_df['key'] == key].distribution[0].keys())
            responses = list(row.responses.split(","))
            alpha_responses = {}
            for i in range(num_options):
                alpha_responses[string.ascii_lowercase[i]] = 0
            for response in responses[:50]:
                response = response.strip().lower()
                if response not in alpha_responses.keys():
                    invalid += 1
                else:
                    count = alpha_responses[response]
                    alpha_responses[response] = count + 1
            total = sum(alpha_responses.values())
            for alpha in alpha_responses:
                alpha_responses[alpha] = alpha_responses[alpha]/total
            question_row = pd.DataFrame(
                    {
                        "key": key,
                        "distribution": str(alpha_responses),
                    },
                    index=[0],
                )
            model_dist_df = pd.concat([model_dist_df, question_row])
    # print(invalid)
    if not os.path.exists(f"data/dist/{model}_dist/"):
        os.makedirs(f"data/dist/{model}_dist/")    
    # print(model_dist_df.head())
    model_dist_df.to_pickle(f"data/dist/{model}_dist/{bias_type}.pickle")

In [None]:
import pandas as pd
import scipy.stats as stats
from scipy.stats import wasserstein_distance
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
from collections import defaultdict
from scipy.stats import ttest_1samp
from statistics import mean
import ast
from utils import *

In [None]:
def get_max_wd(ordered_ref_weights):
    d0, d1 = np.zeros(len(ordered_ref_weights)), np.zeros(len(ordered_ref_weights))
    d0[np.argmax(ordered_ref_weights)] = 1
    d1[np.argmin(ordered_ref_weights)] = 1
    max_wd = wasserstein_distance(ordered_ref_weights, ordered_ref_weights, d0, d1)
    return max_wd

In [None]:
def get_x_ord(num_item, bias_type):
    
    if bias_type in ['acquiescence' , 'response_order', 'allow_forbid']:
        return [num*1.0 for num in range(1, num_item+1)]
    elif bias_type == 'odd_even' or bias_type == 'opinion_float':
        if num_item == 4:
            return [1.0, 2.0, 4.0, 5.0]
        else:
            return [1.0, 2.0, 3.0, 4.0, 5.0]
    else:
        print("not implemented")
        
    

In [None]:
#compute wasserstein distances

models = ['llama2-7b', 'llama2-13b', 'llama2-70b', 'llama2-70b-ift', 'llama2-7b-chat', 'llama2-13b-chat', 'llama2-70b-chat', 'gpt-3.5-turbo', 'gpt-3.5-turbo-instruct']
bias_types = ['acquiescence','response_order', 'odd_even', 'allow_forbid', 'opinion_float'] 

w_dists = []

for model in models:
    print(model)
        
    for bias_type in bias_types:
    
        print(bias_type)
        
        if bias_type == 'opinion_float':
            new_bias_type = "odd_even"
        else:
            new_bias_type = bias_type
        
        model_df = pd.read_pickle('../data/dist/'+model+'_dist/'+new_bias_type+'.pickle') 
        
        if new_bias_type == 'odd_even':
            new_bias_type = "odd_even-opinion_float"
            
        human_df = pd.read_pickle('../data/dist/human_dist/'+new_bias_type+'.pickle')
        
        comb_df = pd.merge(model_df, human_df, on="key")
                
        for key in comb_df['key']:
            
            model_dist = comb_df[comb_df['key']==key]['distribution_x'].item()
            model_dist = ast.literal_eval(model_dist)
                        
            num_items = len(model_dist.keys())
            
            model_dist = [model_dist[key] for key in model_dist.keys()]
            
            human_dist = comb_df[comb_df['key']==key]['distribution_y'].item()
            human_dist = ast.literal_eval(human_dist)
            human_dist = [human_dist[key] for key in human_dist.keys()]
            
            x_ordinal = get_x_ord(num_items, bias_type)
            
            dist = wasserstein_distance(x_ordinal, x_ordinal ,model_dist, human_dist) / get_max_wd(x_ordinal)

            w_dists.append([key, bias_type, model, dist])

dist_df = pd.DataFrame(w_dists, columns = ['key', 'bias type', 'model', 'w_dist'])

In [None]:
set(dist_df['bias type'])

In [None]:
models = ['llama2-7b', 'llama2-13b', 'llama2-70b', 'llama2-70b-ift', 'llama2-7b-chat', 'llama2-13b-chat', 'llama2-70b-chat', 'gpt-3.5-turbo', 'gpt-3.5-turbo-instruct']
bias_types = ['acquiescence','response_order', 'odd_even', 'opinion_float', 'allow_forbid']

effect_lst = []

for model in models:
    print(model)
        
    for bias_type in bias_types:
    
        print(bias_type)

        scores, p_value, keys = run_stat_test(model, bias_type)       
        
        for score, key in zip(scores,keys):
            effect_lst.append([key, bias_type, model, score/50.])

effect_df = pd.DataFrame(effect_lst, columns = ['key', 'bias type', 'model', 'effect'])

In [None]:
new_df = pd.merge(effect_df, dist_df)


In [None]:
1-new_df.groupby(['model'])['w_dist'].mean()