In [15]:
import pandas as pd
import numpy as np
import ast

In [7]:
INPUT_ALLOCATIONS = "allocations/obermeyer/allocations_sr_10_qr_25.csv"

In [16]:
df = pd.read_csv(INPUT_ALLOCATIONS)
df['selected'] = df['selected'].apply(ast.literal_eval)
df['unselected'] = df['unselected'].apply(ast.literal_eval)

In [18]:
QUALIFICATION_COLUMN = 'threshold_75' # 75th percentile translates to 25% qualification
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")
data.head()

Unnamed: 0,cost_t,race,dem_female,dem_age_band_18-24_tm1,dem_age_band_25-34_tm1,dem_age_band_35-44_tm1,dem_age_band_45-54_tm1,dem_age_band_55-64_tm1,dem_age_band_65-74_tm1,dem_age_band_75+_tm1,...,cost_op_surgery_tm1,cost_other_tm1,cost_pharmacy_tm1,cost_physical_therapy_tm1,cost_radiology_tm1,gagne_sum_tm1,threshold_25,threshold_50,threshold_75,person_id
0,0.00218,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.044465,0.003623,6.5e-05,0.0,0.0,0.0,0,0,0,0
1,0.004723,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.025597,0.032091,6.5e-05,0.0,0.014486,0.222222,0,0,0,1
2,0.000908,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6.5e-05,0.0,0.0,0.0,0,0,0,2
3,0.002361,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.001187,0.0,6.5e-05,0.0,0.0,0.0,0,0,0,3
4,0.001998,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.001187,0.001035,6.5e-05,0.0,0.0,0.055556,0,0,0,4


In [19]:
df.head()

Unnamed: 0,seed,iteration,allocation_idx,selected,unselected,k',n'
0,0,0,0,"[4476, 8336, 12068, 24284, 24933, 25722, 39691...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",10,30
1,0,0,1,"[4476, 8336, 12068, 24284, 25722, 39544, 39691...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",10,30
2,0,0,2,"[4395, 4476, 8336, 12068, 24284, 25722, 39691,...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",10,30
3,0,0,3,"[4476, 8336, 12068, 24284, 25722, 39691, 40939...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",10,30
4,0,0,4,"[2903, 4476, 8336, 12068, 24284, 25722, 39691,...","[46876, 25525, 35439, 11596, 1615, 19895, 1779...",10,30


#### K' = Number of Qualified Selected
#### N' = Number of Qualified in Test Set

In [20]:
print(np.mean(df["k'"]))
print(np.std(df["k'"]))
print(np.mean(df["n'"]))
print(np.std(df["n'"]))

9.973703433162893
0.17733792298616255
25.681519357195032
4.9021947977129505


#### Number of Rashomon Allocations

In [21]:
unique_allocations = df.groupby(["seed", "iteration"]).count()["allocation_idx"].reset_index()
print(np.mean(unique_allocations["allocation_idx"]))
print(np.std(unique_allocations["allocation_idx"]))

13.69
9.506518816054593


#### Number of People Systemically Excluded (Never Selected Across Rashomon Allocations)

In [22]:
metric = []
for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():        
        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "unselected"].to_list()
        systemic_rejection = set(allocations[0])
        for a in allocations:
            systemic_rejection = systemic_rejection.intersection(set(a))
        metric.append(len(systemic_rejection))
print(np.mean(metric))
print(np.std(metric))

85.95
1.845941494197473


#### Individual Fairness -- Qualified and Unqualified Selections

In [23]:
qualified_avg = []
qualified_std = []
unqualified_avg = []
unqualified_std = []

for seed in df["seed"].unique():
    for iteration in df["iteration"].unique():
        selected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "selected"].values[0]
        unselected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "unselected"].values[0]
        people = selected + unselected
        test_data = data.loc[data["person_id"].isin(people)].copy()
        qualified_selections = dict.fromkeys(test_data.loc[test_data[QUALIFICATION_COLUMN]==1, "person_id"].to_list(), 0)
        unqualified_selections = dict.fromkeys(test_data.loc[test_data[QUALIFICATION_COLUMN]==0, "person_id"].to_list(), 0)

        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "selected"].to_list()
        for a in allocations:
            for p in a:
                if p in qualified_selections:
                    qualified_selections[p] += 1
                else:
                    unqualified_selections[p] += 1
        qualified_avg.append(np.mean(list(qualified_selections.values())))
        qualified_std.append(np.std(list(qualified_selections.values())))
        unqualified_avg.append(np.mean(list(unqualified_selections.values())))
        unqualified_std.append(np.std(list(unqualified_selections.values())))
print(np.mean(qualified_avg))
print(np.mean(qualified_std))
print(np.mean(unqualified_avg))
print(np.mean(unqualified_std))

nan
nan
1.369
3.7175957159313513


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


#### Group Fairness -- Race

In [26]:
diffs = []
min_diffs = []

for seed in df["seed"].unique():
    min_diff_test = float("inf")
    for iteration in df["iteration"].unique():
        selected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "selected"].values[0]
        unselected = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration)&(df["allocation_idx"]==0), "unselected"].values[0]
        people = selected + unselected
        test_data = data.loc[data["person_id"].isin(people)].copy()
        white_test = test_data.loc[test_data["race"]==0, "person_id"].to_list()
        black_test = test_data.loc[test_data["race"]==1, "person_id"].to_list()

        allocations = df.loc[(df["seed"]==seed)&(df["iteration"]==iteration), "selected"].to_list()
        min_diff = float('inf')
        for a in allocations:
            white_selected = 0
            black_selected = 0
            for p in a:
                if p in white_test:
                    white_selected += 1
                else:
                    black_selected += 1
            white_proportion = white_selected/len(white_test)
            black_proportion = black_selected/len(black_test)
            diff = white_proportion - black_proportion
            diffs.append(diff)
            min_diff = min(min_diff, diff)
        min_diffs.append(min_diff)

print(np.mean(diffs))
print(np.std(diffs))
print(np.mean(min_diffs))
        


-0.038619596408024974
0.10531603050537978
-0.07859136031868014
