In [112]:
import pandas as pd
import numpy as np
import ast

In [113]:
df = pd.read_csv("allocations/obermeyer/allocations_sr_25_qr_50.csv")
df['selected'] = df['selected'].apply(ast.literal_eval)
df['unselected'] = df['unselected'].apply(ast.literal_eval)

In [114]:
QUALIFICATION_COLUMN = 'threshold_50' # 75th percentile translates to 25% qualification
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")
data.head()

Unnamed: 0,cost_t,dem_female,dem_age_band_18-24_tm1,dem_age_band_25-34_tm1,dem_age_band_35-44_tm1,dem_age_band_45-54_tm1,dem_age_band_55-64_tm1,dem_age_band_65-74_tm1,dem_age_band_75+_tm1,hypertension_elixhauser_tm1,...,cost_op_surgery_tm1,cost_other_tm1,cost_pharmacy_tm1,cost_physical_therapy_tm1,cost_radiology_tm1,gagne_sum_tm1,threshold_25,threshold_50,threshold_75,person_id
0,0.00218,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.044465,0.003623,6.5e-05,0.0,0.0,0.0,1,0,0,0
1,0.004723,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.025597,0.032091,6.5e-05,0.0,0.014486,0.222222,1,0,0,1
2,0.000908,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6.5e-05,0.0,0.0,0.0,0,0,0,2
3,0.002361,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.001187,0.0,6.5e-05,0.0,0.0,0.0,1,0,0,3
4,0.001998,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.001187,0.001035,6.5e-05,0.0,0.0,0.055556,0,0,0,4


In [115]:
df.head()

Unnamed: 0,seed,iteration,allocation_idx,selected,unselected,k',n'
0,0,0,0,"[83, 2903, 2999, 4395, 4476, 7258, 8336, 8430,...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",25,51
1,0,0,1,"[83, 1617, 2903, 2999, 4395, 4476, 7258, 8336,...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",25,51
2,0,0,2,"[83, 1617, 2903, 2999, 4395, 4476, 8336, 8430,...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",25,51
3,0,0,3,"[83, 2903, 2999, 4395, 4476, 7258, 8336, 8430,...","[46876, 35439, 11596, 1615, 19895, 17799, 3300...",25,51
4,0,0,4,"[83, 1617, 2903, 2999, 4395, 4476, 7258, 8336,...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",25,51


#### K' = Number of Qualified Selected
#### N' = Number of Qualified in Test Set

In [116]:
print(np.mean(df["k'"]))
print(np.std(df["k'"]))
print(np.mean(df["n'"]))
print(np.std(df["n'"]))

24.669873722188814
0.6829953630546073
52.73541791942273
4.9353964617692325


#### Number of Rashomon Allocations

In [117]:
unique_allocations = df.groupby(["seed", "iteration"]).count()["allocation_idx"].reset_index()
print(np.mean(unique_allocations["allocation_idx"]))
print(np.std(unique_allocations["allocation_idx"]))

16.63
14.769329707200663


#### Number of People Systemically Excluded (Never Selected Across Rashomon Allocations)

In [118]:
metric = []
for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():        
        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "unselected"].to_list()
        systemic_rejection = set(allocations[0])
        for a in allocations:
            systemic_rejection = systemic_rejection.intersection(set(a))
        metric.append(len(systemic_rejection))
print(np.mean(metric))
print(np.std(metric))

71.22
1.9161419571628822


#### Individual Fairness -- Qualified and Unqualified Selections

In [142]:
qualified_avg = []
qualified_std = []
unqualified_avg = []
unqualified_std = []

for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():
        selected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "selected"].values[0]
        unselected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "unselected"].values[0]
        people = selected + unselected
        test_data = data.loc[data["person_id"].isin(people)].copy()
        qualified_selections = dict.fromkeys(test_data.loc[test_data[QUALIFICATION_COLUMN]==1, "person_id"].to_list(), 0)
        unqualified_selections = dict.fromkeys(test_data.loc[test_data[QUALIFICATION_COLUMN]==0, "person_id"].to_list(), 0)

        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "selected"].to_list()
        for a in allocations:
            for p in a:
                if p in qualified_selections:
                    qualified_selections[p] += 1
                else:
                    unqualified_selections[p] += 1
        qualified_avg.append(np.mean(list(qualified_selections.values())))
        qualified_std.append(np.std(list(qualified_selections.values())))
        unqualified_avg.append(np.mean(list(unqualified_selections.values())))
        unqualified_std.append(np.std(list(unqualified_selections.values())))
print(np.mean(qualified_avg))
print(np.mean(qualified_std))
print(np.mean(unqualified_avg))
print(np.mean(unqualified_std))

7.840207306786407
7.732958250842103
0.1042768548575128
0.5284647406387252
