In [1]:
import pandas as pd
import numpy as np
from allocations import Allocations
from metrics import Metrics

In [2]:
method = "test_bootstrap_lr"
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")
output_file = method+"_metrics.csv"

In [3]:
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")

data["age"] = 0

columns = ['dem_age_band_18-24_tm1',
       'dem_age_band_25-34_tm1', 'dem_age_band_35-44_tm1',
       'dem_age_band_45-54_tm1', 'dem_age_band_55-64_tm1',
       'dem_age_band_65-74_tm1', 'dem_age_band_75+_tm1']

for i,c in enumerate(columns):
    data.loc[data[c]==1, "age"] = i

In [4]:
ITERATIONS_PER_SPLIT = 25
TEST_SIZE = 1000
RASHOMON_EPSILON = 0.01

all_results = []
for QUALIFICATION_COLUMN in ["qualified_gagne_1", "qualified_gagne_2", "qualified_gagne_3"]:
    df = pd.read_csv(method+"_"+QUALIFICATION_COLUMN+".csv")
    for SELECTION_RATE in [0.10, 0.25, 0.50]:
        print(SELECTION_RATE, QUALIFICATION_COLUMN)
        a = Allocations(df, TEST_SIZE, SELECTION_RATE, "y", RASHOMON_EPSILON, ITERATIONS_PER_SPLIT)
        allocations, people = a.get_allocations()
        m = Metrics(allocations, people, data, QUALIFICATION_COLUMN)
        results = {}
        results["qualification_rate"] = QUALIFICATION_COLUMN
        results["selection_rate"] = SELECTION_RATE
        results["test_size"] = TEST_SIZE
        results["rashomon_epsilon"] = RASHOMON_EPSILON
    
        k_prime = m.k_prime()
        results["k_prime_avg"] = k_prime[0]
        results["k_prime_std"] = k_prime[1]
        
        n_prime = m.n_prime()
        results["n_prime_avg"] = n_prime[0]
        results["n_prime_std"] = n_prime[1]
        
        count_rashomon_allocations = m.count_rashomon_allocations()
        results["count_rashomon_allocations_avg"] = count_rashomon_allocations[0]
        results["count_rashomon_allocations_std"] = count_rashomon_allocations[1]
        
        count_rashomon_models = m.count_rashomon_models()
        results["count_rashomon_models_avg"] = count_rashomon_models[0]
        results["count_rashomon_models_std"] = count_rashomon_models[1]
        
        count_rashomon_models_per_allocation = m.count_rashomon_models_per_allocation()
        results["count_rashomon_models_per_allocation_avg"] = count_rashomon_models_per_allocation[0]
        results["count_rashomon_models_per_allocation_std"] = count_rashomon_models_per_allocation[1]
        
        systemic_exclusion = m.systemic_exclusion()
        results["systemic_exclusion_avg"] = systemic_exclusion[0]
        results["systemic_exclusion_std"] = systemic_exclusion[1]
    
        homogenization = m.homogenization_in_individual_decisions()
        results["heterozygosity_avg"] = homogenization[0]
        results["heterozygosity_std"] = homogenization[1]
        results["heterozygosity_baseline_avg"] = homogenization[2]
        results["heterozygosity_baseline_std"] = homogenization[3]

        homogenization = m.homogenization_in_selected_individuals("age")
        results["age_homogenization_avg"] = homogenization[0]
        results["age_homogenization_std"] = homogenization[1]
        results["age_homogenization_baseline1_avg"] = homogenization[2]
        results["age_homogenization_baseline1_std"] = homogenization[3]
        results["age_homogenization_baseline2_avg"] = homogenization[4]
        results["age_homogenization_baseline2_std"] = homogenization[5]
    
        group = m.group_selection_rates_found("race", 1)
        results["black_selection_found_avg"] = group[0]
        results["black_selection_found_std"] = group[1]
        results["black_selection_best_found_avg"] = group[2]
        results["black_selection_best_found_std"] = group[3]
    
        group = m.group_selection_rates_possible("race", 1)
        results["black_selection_possible_avg"] = group[0]
        results["black_selection_possible_std"] = group[1]
        results["black_selection_best_possible_avg"] = group[2]
        results["black_selection_best_possible_std"] = group[3]
    
        group = m.group_feature_ratios_found("race", 1, 0, "gagne_sum_t")
        results["feature_ratio_found_avg"] = group[0]
        results["feature_ratio_found_std"] = group[1]
        results["feature_ratio_best_found_avg"] = group[2]
        results["feature_ratio_best_found_std"] = group[3]
            
        group = m.group_feature_ratios_possible("race", 1, 0, "gagne_sum_t")
        results["feature_ratio_possible_avg"] = group[0]
        results["feature_ratio_possible_std"] = group[1]
        results["feature_ratio_best_possible_avg"] = group[2]
        results["feature_ratio_best_possible_std"] = group[3]
        
        all_results.append(results)

0.1 qualified_gagne_1
0.25 qualified_gagne_1
0.5 qualified_gagne_1
0.1 qualified_gagne_2
0.25 qualified_gagne_2
0.5 qualified_gagne_2
0.1 qualified_gagne_3
0.25 qualified_gagne_3
0.5 qualified_gagne_3


In [5]:
all_results = pd.DataFrame(all_results)
all_results.to_csv(output_file, index=False)