In [1]:
import pandas as pd
import numpy as np
from allocations import Allocations
from metrics import Metrics

In [2]:
df = pd.read_csv("predictions/obermeyer/test_bootstrap_lr_qualified_gagne_3.csv")
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")
QUALIFICATION_COLUMN = "qualified_gagne_3"
output_file = "results.csv"

data["age"] = 0

columns = ['dem_age_band_18-24_tm1',
       'dem_age_band_25-34_tm1', 'dem_age_band_35-44_tm1',
       'dem_age_band_45-54_tm1', 'dem_age_band_55-64_tm1',
       'dem_age_band_65-74_tm1', 'dem_age_band_75+_tm1']
for i,c in enumerate(columns):
    data.loc[data[c]==1, "age"] = i

In [3]:
ITERATIONS_PER_SPLIT = 25

all_results = []
for SELECTION_RATE in [0.10, 0.25, 0.50]:
    for TEST_SIZE in [100, 1000]:
        for RASHOMON_EPSILON in [0.01, 0.025]:
            print(SELECTION_RATE, TEST_SIZE, RASHOMON_EPSILON)
            a = Allocations(df, TEST_SIZE, SELECTION_RATE, "y", RASHOMON_EPSILON, ITERATIONS_PER_SPLIT)
            allocations = a.get_allocations()
            m = Metrics(allocations, data, QUALIFICATION_COLUMN)
            results = {}
            results["qualification_rate"] = QUALIFICATION_COLUMN
            results["selection_rate"] = SELECTION_RATE
            results["test_size"] = TEST_SIZE
            results["rashomon_epsilon"] = RASHOMON_EPSILON
        
            k_prime = m.k_prime()
            results["k_prime_avg"] = k_prime[0]
            results["k_prime_std"] = k_prime[1]
            
            n_prime = m.n_prime()
            results["n_prime_avg"] = n_prime[0]
            results["n_prime_std"] = n_prime[1]
            
            count_rashomon_allocations = m.count_rashomon_allocations()
            results["count_rashomon_allocations_avg"] = count_rashomon_allocations[0]
            results["count_rashomon_allocations_std"] = count_rashomon_allocations[1]
            
            count_rashomon_models = m.count_rashomon_models()
            results["count_rashomon_models_avg"] = count_rashomon_models[0]
            results["count_rashomon_models_std"] = count_rashomon_models[1]
            
            count_rashomon_models_per_allocation = m.count_rashomon_models_per_allocation()
            results["count_rashomon_models_per_allocation_avg"] = count_rashomon_models_per_allocation[0]
            results["count_rashomon_models_per_allocation_std"] = count_rashomon_models_per_allocation[1]
            
            systemic_exclusion = m.systemic_exclusion()
            results["systemic_exclusion_avg"] = systemic_exclusion[0]
            results["systemic_exclusion_std"] = systemic_exclusion[1]
        
            systemic_exclusion = m.systemic_exclusion_pairwise()
            results["systemic_exclusion_pairwise_avg"] = systemic_exclusion[0]
            results["systemic_exclusion_pairwise_std"] = systemic_exclusion[1]
        
            homogenization = m.homogenization_in_selected_individuals("age")
            results["age_homogenization_avg"] = homogenization[0]
            results["age_homogenization_std"] = homogenization[1]
            results["age_homogenization_baseline1_avg"] = homogenization[2]
            results["age_homogenization_baseline1_std"] = homogenization[3]
            results["age_homogenization_baseline2_avg"] = homogenization[4]
            results["age_homogenization_baseline2_std"] = homogenization[5]
        
            homogenization = m.homogenization_in_individual_decisions()
            results["decision_homogenization_avg"] = homogenization[0]
            results["decision_homogenization_std"] = homogenization[1]
            results["decision_homogenization_baseline_avg"] = homogenization[2]
            results["decision_homogenization_baseline_std"] = homogenization[3]
        
            arbitrariness = m.arbitrariness()
            results["arbitrary_qualified_avg"] = arbitrariness[0]
            results["arbitrary_qualified_std"] = arbitrariness[1]
            results["arbitrary_unqualified_avg"] = arbitrariness[2]
            results["arbitrary_unqualified_std"] = arbitrariness[3]
            
            group = m.group_selection_rate("race", 1)
            results["black_selection_avg"] = group[0]
            results["black_selection_std"] = group[1]
            results["black_selection_best_found_avg"] = group[2]
            results["black_selection_best_found_std"] = group[3]
        
        
            group = m.group_selection_rate_best("race", 1)
            results["black_selection_best_possible1_avg"] = group[0]
            results["black_selection_best_possible1_std"] = group[1]
            results["black_selection_best_possible2_avg"] = group[2]
            results["black_selection_best_possible2_std"] = group[3]
        
            group = m.group_feature_ratio("race", 1, 0, "gagne_sum_t")
            results["feature_ratio_avg"] = group[0]
            results["feature_ratio_std"] = group[1]
            results["feature_ratio_best_found_avg"] = group[2]
            results["feature_ratio_best_found_std"] = group[3]
                
            group = m.group_feature_ratio_best("race", 1, 0, "gagne_sum_t")
            results["feature_ratio_best_possible1_avg"] = group[0]
            results["feature_ratio_best_possible1_std"] = group[1]
            results["feature_ratio_best_possible2_avg"] = group[2]
            results["feature_ratio_best_possible2_std"] = group[3]
            
            all_results.append(results)

0.1 100 0.01


  metric.append(np.nanmean(metric_inner))
  metric_avg = [np.nanmean(inner) for inner in metric]
  metric_best_found = [np.nanmin(inner) for inner in metric]


0.1 100 0.025


  metric.append(np.nanmean(metric_inner))
  metric_avg = [np.nanmean(inner) for inner in metric]
  metric_best_found = [np.nanmin(inner) for inner in metric]


0.1 1000 0.01
0.1 1000 0.025
0.25 100 0.01


  metric.append(np.nanmean(metric_inner))
  metric_avg = [np.nanmean(inner) for inner in metric]
  metric_best_found = [np.nanmin(inner) for inner in metric]


0.25 100 0.025


  metric.append(np.nanmean(metric_inner))
  metric_avg = [np.nanmean(inner) for inner in metric]
  metric_best_found = [np.nanmin(inner) for inner in metric]


0.25 1000 0.01
0.25 1000 0.025
0.5 100 0.01


  metric.append(np.nanmean(metric_inner))


0.5 100 0.025


  metric.append(np.nanmean(metric_inner))


0.5 1000 0.01
0.5 1000 0.025


In [4]:
all_results = pd.DataFrame(all_results)
all_results.to_csv(output_file, index=False)