### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import jsonlines

## Read the simulation output

First as raw JSON lines

In [57]:
BASELINE = 'history_file_baseline_2agents.json'
PROD_LOTTERY = 'history_file_product_lottery_2agents.json'
STATIC = 'history_file_static_lottery_2agents.json'
LEAST_FAIR = 'history_file_least_fair_2agents.json'
MOST_COMPAT = 'history_file_most_compatible_2agents.json'
FAIR_LOTTERY = 'history_file_fairness_lottery_2agents.json'
WEIGHTED_ALLOC = 'history_file_product_allocation_2agents.json'

In [3]:
def sim_reader(filename):
    entries = []
    with jsonlines.open(filename) as reader:
        for obj in reader:
            entries.append(obj)
    return entries

In [4]:
def process_history(history, fair=True, compat=True, alloc=True, lists=True):
    if fair:
        fair_list = [entry['allocation']['fairness scores'] for entry in history]
        fair_df = pd.DataFrame(fair_list)
    else:
        fair_df = None
        
    if compat:
        compat_list = [entry['allocation']['compatibility scores'] for entry in history]
        compat_df = pd.DataFrame(compat_list)
    else:
        compat_df = None
        
    if alloc:
        alloc_list = [entry['allocation']['output'] for entry in history]
        alloc_df = pd.DataFrame(alloc_list)
        alloc_df['none'] = (alloc_df['Agent1'] == 0) & (alloc_df['Agent2'] == 0)
    else:
        alloc_df = None
        
    if lists:
        results_list = [process_results(entry['choice']['output']['results']) for entry in history]
    else:
        results_list = None
    
        
    return fair_df, compat_df, alloc_df, results_list

def process_results(result_structs):
    
    return [(entry['item'], entry['score']) for entry in result_structs]

        

In [18]:
item_data = pd.read_csv('../data1K-2agents/Data/item.csv', header=None)
item_data.columns = ['item', 'agent', 'protected']


In [31]:
item_data = item_data.set_index('item')

In [23]:
# assumes that there's only one protected feature per item
# ok for these experiments
def count_protected(agent, result_entries):
    return sum([1 if item_data.loc[int(result[0]), 'agent'] == agent else 0 for result in result_entries])
    
def count_all_protected(agent, results):
    return sum([count_protected(agent, result_entries) for result_entries in results])

In [58]:
histories = {'baseline': BASELINE, 
             'least_misery': LEAST_FAIR,
             'most_compat' : MOST_COMPAT,
             'static' : STATIC,
             'fair_lottery': FAIR_LOTTERY,
             'weighted_alloc': WEIGHTED_ALLOC}
             
def compute_results(filename):
    history = sim_reader(filename)
    _, _, _, results = process_history(history, fair=False, compat=False, alloc=False, lists=True)
    return results

result_dict = {name: compute_results(filename) for name, filename in histories.items()}


In [59]:
count_dict0 = {name: count_all_protected(0, results) for name, results in result_dict.items()}

In [60]:
count_dict1 = {name: count_all_protected(1, results) for name, results in result_dict.items()}

In [61]:
count_dict2 = {name: count_all_protected(2, results) for name, results in result_dict.items()}

In [39]:
count_dict1

{'baseline': 2771,
 'least_misery': 5945,
 'most_compat': 4234,
 'static': 5425,
 'fair_lottery': 5752}

In [40]:
count_dict2

{'baseline': 2712,
 'least_misery': 4003,
 'most_compat': 4215,
 'static': 3609,
 'fair_lottery': 4205}

In [65]:
count_df = pd.DataFrame([count_dict0, count_dict1, count_dict2])

In [66]:
count_df

Unnamed: 0,baseline,least_misery,most_compat,static,fair_lottery,weighted_alloc
0,4517,52,1551,966,43,71
1,2771,5945,4234,5425,5752,5817
2,2712,4003,4215,3609,4205,4112


In [67]:
count_df/10000

Unnamed: 0,baseline,least_misery,most_compat,static,fair_lottery,weighted_alloc
0,0.4517,0.0052,0.1551,0.0966,0.0043,0.0071
1,0.2771,0.5945,0.4234,0.5425,0.5752,0.5817
2,0.2712,0.4003,0.4215,0.3609,0.4205,0.4112


In [69]:
1- ((.75 - .59)/.75)


0.7866666666666666