In [39]:
import pandas as pd
import numpy as np
import ast

In [40]:
INPUT_ALLOCATIONS = "allocations/obermeyer/allocations_sr_25_qr_50.csv"
QUALIFICATION_COLUMN = 'threshold_50' # 75th percentile translates to 25% qualification

In [41]:
df = pd.read_csv(INPUT_ALLOCATIONS)
df['selected'] = df['selected'].apply(ast.literal_eval)
df['unselected'] = df['unselected'].apply(ast.literal_eval)
df.head()

Unnamed: 0,seed,iteration,allocation_idx,selected,unselected,k',n',model_count
0,0,0,0,"[83, 2903, 4395, 4476, 6945, 8336, 11193, 1159...","[25525, 35439, 1615, 19895, 17799, 33009, 1536...",22,51,1
1,0,0,1,"[83, 2903, 4395, 4476, 6945, 8336, 11596, 1206...","[25525, 35439, 1615, 19895, 17799, 33009, 1536...",21,51,1
2,0,0,2,"[2217, 2903, 4395, 4476, 6945, 8336, 11596, 12...","[25525, 35439, 1615, 19895, 17799, 15362, 3604...",21,51,2
3,0,0,3,"[83, 2903, 4395, 4476, 6945, 8336, 11596, 1206...","[25525, 35439, 1615, 19895, 17799, 33009, 1536...",21,51,2
4,0,0,4,"[83, 2903, 4395, 4476, 6945, 8336, 11596, 1206...","[25525, 35439, 1615, 19895, 17799, 33009, 1536...",21,51,8


In [46]:
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")
data.head()

Unnamed: 0,cost_t,race,dem_female,dem_age_band_18-24_tm1,dem_age_band_25-34_tm1,dem_age_band_35-44_tm1,dem_age_band_45-54_tm1,dem_age_band_55-64_tm1,dem_age_band_65-74_tm1,dem_age_band_75+_tm1,...,cost_other_tm1,cost_pharmacy_tm1,cost_physical_therapy_tm1,cost_radiology_tm1,gagne_sum_tm1,gagne_sum_t,threshold_25,threshold_50,threshold_75,person_id
0,0.00218,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.003623,6.5e-05,0.0,0.0,0.0,0,1,0,0,0
1,0.004723,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.032091,6.5e-05,0.0,0.014486,0.222222,3,1,0,0,1
2,0.000908,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,6.5e-05,0.0,0.0,0.0,0,0,0,0,2
3,0.002361,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,6.5e-05,0.0,0.0,0.0,0,1,0,0,3
4,0.001998,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.001035,6.5e-05,0.0,0.0,0.055556,1,0,0,0,4


#### K' = Number of Qualified Selected
#### N' = Number of Qualified in Test Set

In [21]:
print(np.mean(df["k'"]))
print(np.std(df["k'"]))
print(np.mean(df["n'"]))
print(np.std(df["n'"]))

20.014095851792188
2.116093583003527
51.21627064035441
5.011178107734593


#### Number of Rashomon Allocations
#### Number of Rashomon Models
#### Number of Rashomon Models Per Allocation


In [22]:
unique_allocations = df.groupby(["seed", "iteration"]).count()["allocation_idx"].reset_index()
print(np.mean(unique_allocations["allocation_idx"]))
print(np.std(unique_allocations["allocation_idx"]))
print()
unique_allocations = df.groupby(["seed", "iteration"]).sum()["model_count"].reset_index()
print(np.mean(unique_allocations["model_count"]))
print(np.std(unique_allocations["model_count"]))
print()
print(np.mean(df["model_count"]))
print(np.std(df["model_count"]))
print()

24.83
11.844876529537993

63.72
29.124930901205587

2.5662505034232783
4.149824552293205



#### Number of People Systemically Excluded (Never Selected Across Rashomon Allocations)

In [23]:
metric = []
for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():        
        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "unselected"].to_list()
        systemic_rejection = set(allocations[0])
        for a in allocations:
            systemic_rejection = systemic_rejection.intersection(set(a))
        metric.append(len(systemic_rejection))
print(np.mean(metric))
print(np.std(metric))

68.34
2.1034257771549725


#### Individual Fairness -- Qualified and Unqualified Selections

In [24]:
qualified_avg = []
qualified_std = []
unqualified_avg = []
unqualified_std = []

for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():
        selected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "selected"].values[0]
        unselected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "unselected"].values[0]
        people = selected + unselected
        test_data = data.loc[data["person_id"].isin(people)].copy()
        qualified_selections = dict.fromkeys(test_data.loc[test_data[QUALIFICATION_COLUMN]==1, "person_id"].to_list(), 0)
        unqualified_selections = dict.fromkeys(test_data.loc[test_data[QUALIFICATION_COLUMN]==0, "person_id"].to_list(), 0)

        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "selected"].to_list()
        for a in allocations:
            for p in a:
                if p in qualified_selections:
                    qualified_selections[p] += 1
                else:
                    unqualified_selections[p] += 1
        qualified_avg.append(np.mean(list(qualified_selections.values())))
        qualified_std.append(np.std(list(qualified_selections.values())))
        unqualified_avg.append(np.mean(list(unqualified_selections.values())))
        unqualified_std.append(np.std(list(unqualified_selections.values())))
print(np.mean(qualified_avg))
print(np.mean(qualified_std))
print(np.mean(unqualified_avg))
print(np.mean(unqualified_std))

9.753119709792946
11.366691084196475
2.517502765463864
6.706520431622943


#### Group Fairness -- % Highest Risk Patients That Are Black

In [84]:
all_props = []

black_people = data.loc[data["race"]==1, "person_id"].to_list()

for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():
        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "selected"].to_list()
        props = []
        for a in allocations:
            black_selected = 0
            for p in a:
                if p in black_people:
                    black_selected += 1
            props.append(black_selected/len(a))
        all_props.append(props)

all_props_flattened = [p for props in all_props for p in props]
all_props_max = [max(props) for props in all_props]
print(np.nanmean(all_props_flattened))
print(np.nanstd(all_props_flattened))
print(np.nanmean(all_props_max))

0.1502537253322594
0.06741812400712305
0.1824


#### Group Fairness -- Difference in # Chronic Illnesses Among Selected Patients By Race

In [82]:
all_ratios = []

black_people = data.loc[data["race"]==1, "person_id"].to_list()

count = 0
for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():
        count += 1
        selected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "selected"].values[0]
        unselected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "unselected"].values[0]
        people = selected + unselected
        test_data = data.loc[data["person_id"].isin(people)].copy()

        ratio = []
        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "selected"].to_list()
        for a in allocations:
            white_num_illnesses = test_data.loc[(test_data["race"]==0)&(test_data["person_id"].isin(a)), "gagne_sum_t"].mean()
            black_num_illnesses = test_data.loc[(test_data["race"]==1)&(test_data["person_id"].isin(a)), "gagne_sum_t"].mean()
            ratio.append(black_num_illnesses/white_num_illnesses)
        all_ratios.append(ratio)

all_ratios_flattened = [r for ratios in all_ratios for r in ratios]
all_ratios_min = [min(ratios) for ratios in all_ratios]
print(np.nanmean(all_ratios_flattened))
print(np.nanstd(all_ratios_flattened))
print(np.nanmean(all_ratios_min))

1.4360271161150773
0.6923885638641755
1.2436095247051229
