In [1]:
import pandas as pd
import numpy as np
from allocations import Allocations
from metrics import Metrics

In [2]:
df = pd.read_csv("predictions/obermeyer/bootstrap_lr.csv")
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")

In [3]:
TEST_SIZE = 100
RASHOMON_EPSILON = 0.01
ITERATIONS_PER_SPLIT = 25

all_results = []
for SELECTION_RATE in [0.10, 0.25, 0.50]:
    for QUALIFICATION_COLUMN in ["qualified_gagne_3", "qualified_gagne_2", "qualified_gagne_1"]:
        print(SELECTION_RATE, QUALIFICATION_COLUMN)
        a = Allocations(df, TEST_SIZE, SELECTION_RATE, QUALIFICATION_COLUMN, RASHOMON_EPSILON, ITERATIONS_PER_SPLIT)
        allocations = a.get_allocations()
        m = Metrics(allocations, data, QUALIFICATION_COLUMN)
        results = {}
        results["qualification_rate"] = QUALIFICATION_COLUMN
        results["selection_rate"] = SELECTION_RATE
        results["test_size"] = TEST_SIZE
        results["rashomon_epsilon"] = RASHOMON_EPSILON


        h = m.local_homogenization()
        results["local_homog_avg"] = h[0]
        results["local_homog_std"] = h[1]
        results["baseline_local_homog_avg"] = h[2]
        results["baseline_local_homog_std"] = h[3]
        print(h)
        
        h = m.global_homogenization("age")
        results["global_homog_avg"] = h[0]
        results["global_homog_std"] = h[1]
        results["baseline_global_homog_avg"] = h[2]
        results["baseline_global_homog_std"] = h[3]
        print(h)
        
        all_results.append(results)

0.1 qualified_gagne_3
(np.float64(0.021285184798953558), np.float64(0.01579276889526246), np.float64(0.2204667291899609), np.float64(0.038886456051999786))
(np.float64(1.4500489757943478), np.float64(0.16760803970362545), np.float64(1.7220313769272735), np.float64(0.0552446028419653))
0.1 qualified_gagne_2
(np.float64(0.025967714414243524), np.float64(0.016064705347202987), np.float64(0.23933460868481682), np.float64(0.030998985812317897))
(np.float64(1.4465345252487996), np.float64(0.16624733288652263), np.float64(1.695180702630797), np.float64(0.05856569296011521))
0.1 qualified_gagne_1
(np.float64(0.03622911098074699), np.float64(0.0165254923921402), np.float64(0.27073140380689575), np.float64(0.018892343525591684))
(np.float64(1.459173505818766), np.float64(0.15956876810626053), np.float64(1.6875839054708162), np.float64(0.055325301285366724))
0.25 qualified_gagne_3
(np.float64(0.03876545302858737), np.float64(0.021960303497570964), np.float64(0.4001925631500878), np.float64(0.0522

In [None]:
TEST_SIZE = 100
RASHOMON_EPSILON = 0.01
ITERATIONS_PER_SPLIT = 25

all_results = []
for SELECTION_RATE in [0.10, 0.25, 0.50]:
    for QUALIFICATION_RATE in [0.25, 0.50, 0.75]:
        print(SELECTION_RATE, QUALIFICATION_RATE)
        a = Allocations(df, TEST_SIZE, SELECTION_RATE, QUALIFICATION_RATE, RASHOMON_EPSILON, ITERATIONS_PER_SPLIT)
        allocations = a.get_allocations()
        m = Metrics(allocations, data, QUALIFICATION_RATE)
        results = {}
        results["qualification_rate"] = QUALIFICATION_RATE
        results["selection_rate"] = SELECTION_RATE
        results["test_size"] = TEST_SIZE
        results["rashomon_epsilon"] = RASHOMON_EPSILON

        k_prime = m.k_prime()
        results["k_prime_avg"] = k_prime[0]
        results["k_prime_std"] = k_prime[1]
        
        n_prime = m.n_prime()
        results["n_prime_avg"] = n_prime[0]
        results["n_prime_std"] = n_prime[1]
        
        count_rashomon_allocations = m.count_rashomon_allocations()
        results["count_rashomon_allocations_avg"] = count_rashomon_allocations[0]
        results["count_rashomon_allocations_std"] = count_rashomon_allocations[1]
        
        count_rashomon_models = m.count_rashomon_models()
        results["count_rashomon_models_avg"] = count_rashomon_models[0]
        results["count_rashomon_models_std"] = count_rashomon_models[1]
        
        count_rashomon_models_per_allocation = m.count_rashomon_models_per_allocation()
        results["count_rashomon_models_per_allocation_avg"] = count_rashomon_models_per_allocation[0]
        results["count_rashomon_models_per_allocation_std"] = count_rashomon_models_per_allocation[1]
        
        systemic_exclusion = m.systemic_exclusion()
        results["systemic_exclusion_avg"] = systemic_exclusion[0]
        results["systemic_exclusion_std"] = systemic_exclusion[1]

        systemic_exclusion = m.systemic_exclusion_pairwise()
        results["systemic_exclusion_pairwise_avg"] = systemic_exclusion[0]
        results["systemic_exclusion_pairwise_std"] = systemic_exclusion[1]

        selections_by_qualification = m.selections_by_qualification()
        results["qualified_selections_avg"] = selections_by_qualification[0]
        results["qualified_selections_std"] = selections_by_qualification[1]
        results["unqualified_selections_avg"] = selections_by_qualification[2]
        results["unqualified_selections_std"] = selections_by_qualification[3]
        
        minority_selection_rate = m.minority_selection_rate("race", 1)
        results["minority_selection_avg"] = minority_selection_rate[0]
        results["minority_selection_std"] = minority_selection_rate[1]
        results["minority_selection_best_found"] = minority_selection_rate[2]
        
        best_minority_selection_rate = m.best_minority_selection_rate("race", 1)
        results["best_minority_selection_avg"] = best_minority_selection_rate[0]
        results["best_minority_selection_std"] = best_minority_selection_rate[1]
        
        feature_ratio_by_group = m.feature_ratio_by_group("race", 1, 0, "gagne_sum_t")
        results["feature_ratio_avg"] = feature_ratio_by_group[0]
        results["feature_ratio_std"] = feature_ratio_by_group[1]
        results["feature_ratio_best_found"] = feature_ratio_by_group[2]
        
        best_feature_ratio = m.best_feature_ratio_by_group("race", 1, 0, "gagne_sum_t")
        results["best_feature_ratio_avg"] = best_feature_ratio[0]
        results["best_feature_ratio_std"] = best_feature_ratio[1]
        all_results.append(results)

In [4]:
all_results = pd.DataFrame(all_results)
all_results.to_csv("results.csv", index=False)