In [1]:
import pandas as pd
import numpy as np
from allocations import Allocations
from metrics import Metrics

In [2]:
df = pd.read_csv("predictions/obermeyer/bootstrap_lr.csv")
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")

In [3]:
TEST_SIZE = 100
RASHOMON_EPSILON = 0.01
ITERATIONS_PER_SPLIT = 25

all_results = []
for SELECTION_RATE in [0.10, 0.25, 0.50]:
    for QUALIFICATION_COLUMN in ["qualified_gagne_3", "qualified_gagne_2", "qualified_gagne_1"]:
        print(SELECTION_RATE, QUALIFICATION_COLUMN)
        a = Allocations(df, TEST_SIZE, SELECTION_RATE, QUALIFICATION_COLUMN, RASHOMON_EPSILON, ITERATIONS_PER_SPLIT)
        allocations = a.get_allocations()
        m = Metrics(allocations, data, QUALIFICATION_COLUMN)
        results = {}
        results["qualification_rate"] = QUALIFICATION_COLUMN
        results["selection_rate"] = SELECTION_RATE
        results["test_size"] = TEST_SIZE
        results["rashomon_epsilon"] = RASHOMON_EPSILON


        h = m.local_homogenization()
        results["local_homog_avg"] = h[0]
        results["local_homog_std"] = h[1]
        results["baseline_local_homog_avg"] = h[2]
        results["baseline_local_homog_std"] = h[3]
        print(h)
        
        h = m.global_homogenization("age")
        results["global_homog_avg"] = h[0]
        results["global_homog_std"] = h[1]
        results["baseline_global_homog_avg"] = h[2]
        results["baseline_global_homog_std"] = h[3]
        print(h)
        
        all_results.append(results)

0.1 qualified_gagne_3
(0.019697816653134855, 0.01434553066052333, 0.23496678694182663, 0.03360150840109481)
(1.4726395214214307, 0.164023430921828, 1.72298646042232, 0.05647947462537045)
0.1 qualified_gagne_2
(0.021143073513279577, 0.014526458217576243, 0.24700235427834952, 0.02945086930388879)
(1.4628550020611812, 0.16963650590552426, 1.6950117221674796, 0.0591973952136307)
0.1 qualified_gagne_1
(0.030194373926708075, 0.014015185301101239, 0.27510827264581195, 0.02036931472524293)
(1.473101628882966, 0.1617594505043569, 1.6894007826658006, 0.05358461488520905)
0.25 qualified_gagne_3
(0.02854366408506015, 0.02184879618878157, 0.40904149137209683, 0.048758837228645484)
(1.639326335521798, 0.10304937364720126, 1.72298646042232, 0.05647947462537045)
0.25 qualified_gagne_2
(0.026597708734593464, 0.021330672390381916, 0.4179635993514651, 0.04683115034005282)
(1.6401837286699399, 0.10601365301403898, 1.6950117221674796, 0.0591973952136307)
0.25 qualified_gagne_1
(0.034037473284626085, 0.0221

In [5]:
TEST_SIZE = 100
RASHOMON_EPSILON = 0.01
ITERATIONS_PER_SPLIT = 25

all_results = []
for SELECTION_RATE in [0.10, 0.25, 0.50]:
    for QUALIFICATION_RATE in [0.25, 0.50, 0.75]:
        print(SELECTION_RATE, QUALIFICATION_RATE)
        a = Allocations(df, TEST_SIZE, SELECTION_RATE, QUALIFICATION_RATE, RASHOMON_EPSILON, ITERATIONS_PER_SPLIT)
        allocations = a.get_allocations()
        m = Metrics(allocations, data, QUALIFICATION_RATE)
        results = {}
        results["qualification_rate"] = QUALIFICATION_RATE
        results["selection_rate"] = SELECTION_RATE
        results["test_size"] = TEST_SIZE
        results["rashomon_epsilon"] = RASHOMON_EPSILON

        k_prime = m.k_prime()
        results["k_prime_avg"] = k_prime[0]
        results["k_prime_std"] = k_prime[1]
        
        n_prime = m.n_prime()
        results["n_prime_avg"] = n_prime[0]
        results["n_prime_std"] = n_prime[1]
        
        count_rashomon_allocations = m.count_rashomon_allocations()
        results["count_rashomon_allocations_avg"] = count_rashomon_allocations[0]
        results["count_rashomon_allocations_std"] = count_rashomon_allocations[1]
        
        count_rashomon_models = m.count_rashomon_models()
        results["count_rashomon_models_avg"] = count_rashomon_models[0]
        results["count_rashomon_models_std"] = count_rashomon_models[1]
        
        count_rashomon_models_per_allocation = m.count_rashomon_models_per_allocation()
        results["count_rashomon_models_per_allocation_avg"] = count_rashomon_models_per_allocation[0]
        results["count_rashomon_models_per_allocation_std"] = count_rashomon_models_per_allocation[1]
        
        systemic_exclusion = m.systemic_exclusion()
        results["systemic_exclusion_avg"] = systemic_exclusion[0]
        results["systemic_exclusion_std"] = systemic_exclusion[1]

        systemic_exclusion = m.systemic_exclusion_pairwise()
        results["systemic_exclusion_pairwise_avg"] = systemic_exclusion[0]
        results["systemic_exclusion_pairwise_std"] = systemic_exclusion[1]

        selections_by_qualification = m.selections_by_qualification()
        results["qualified_selections_avg"] = selections_by_qualification[0]
        results["qualified_selections_std"] = selections_by_qualification[1]
        results["unqualified_selections_avg"] = selections_by_qualification[2]
        results["unqualified_selections_std"] = selections_by_qualification[3]
        
        minority_selection_rate = m.minority_selection_rate("race", 1)
        results["minority_selection_avg"] = minority_selection_rate[0]
        results["minority_selection_std"] = minority_selection_rate[1]
        results["minority_selection_best_found"] = minority_selection_rate[2]
        
        best_minority_selection_rate = m.best_minority_selection_rate("race", 1)
        results["best_minority_selection_avg"] = best_minority_selection_rate[0]
        results["best_minority_selection_std"] = best_minority_selection_rate[1]
        
        feature_ratio_by_group = m.feature_ratio_by_group("race", 1, 0, "gagne_sum_t")
        results["feature_ratio_avg"] = feature_ratio_by_group[0]
        results["feature_ratio_std"] = feature_ratio_by_group[1]
        results["feature_ratio_best_found"] = feature_ratio_by_group[2]
        
        best_feature_ratio = m.best_feature_ratio_by_group("race", 1, 0, "gagne_sum_t")
        results["best_feature_ratio_avg"] = best_feature_ratio[0]
        results["best_feature_ratio_std"] = best_feature_ratio[1]
        all_results.append(results)

0.1 0.25


KeyError: 0.25

In [4]:
all_results = pd.DataFrame(all_results)
all_results.to_csv("results.csv", index=False)