In [13]:
import pandas as pd
import numpy as np
from allocations import Allocations
from metrics import Metrics

In [14]:
df = pd.read_csv("predictions/obermeyer/model_weight_perturbation_lr.csv")
data = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")

In [15]:
TEST_SIZE = 100
RASHOMON_EPSILON = 0.01
ITERATIONS_PER_SPLIT = 25

all_results = []
for SELECTION_RATE in [0.10, 0.25, 0.50]:
    for QUALIFICATION_COLUMN in ["qualified_gagne_3", "qualified_gagne_2", "qualified_gagne_1"]:
        a = Allocations(df, TEST_SIZE, SELECTION_RATE, QUALIFICATION_COLUMN, RASHOMON_EPSILON, ITERATIONS_PER_SPLIT)
        allocations = a.get_allocations()
        m = Metrics(allocations, data, QUALIFICATION_COLUMN)
        results = {}
        results["qualification_rate"] = QUALIFICATION_COLUMN
        results["selection_rate"] = SELECTION_RATE
        results["test_size"] = TEST_SIZE
        results["rashomon_epsilon"] = RASHOMON_EPSILON

        k_prime = m.k_prime()
        results["k_prime_avg"] = k_prime[0]
        results["k_prime_std"] = k_prime[1]
        
        n_prime = m.n_prime()
        results["n_prime_avg"] = n_prime[0]
        results["n_prime_std"] = n_prime[1]
        
        count_rashomon_allocations = m.count_rashomon_allocations()
        results["count_rashomon_allocations_avg"] = count_rashomon_allocations[0]
        results["count_rashomon_allocations_std"] = count_rashomon_allocations[1]
        
        count_rashomon_models = m.count_rashomon_models()
        results["count_rashomon_models_avg"] = count_rashomon_models[0]
        results["count_rashomon_models_std"] = count_rashomon_models[1]
        
        count_rashomon_models_per_allocation = m.count_rashomon_models_per_allocation()
        results["count_rashomon_models_per_allocation_avg"] = count_rashomon_models_per_allocation[0]
        results["count_rashomon_models_per_allocation_std"] = count_rashomon_models_per_allocation[1]
        
        systemic_exclusion = m.systemic_exclusion()
        results["systemic_exclusion_avg"] = systemic_exclusion[0]
        results["systemic_exclusion_std"] = systemic_exclusion[1]

        systemic_exclusion = m.systemic_exclusion_pairwise()
        results["systemic_exclusion_pairwise_avg"] = systemic_exclusion[0]
        results["systemic_exclusion_pairwise_std"] = systemic_exclusion[1]

        selections_by_qualification = m.selections_by_qualification()
        results["qualified_selections_avg"] = selections_by_qualification[0]
        results["qualified_selections_std"] = selections_by_qualification[1]
        results["unqualified_selections_avg"] = selections_by_qualification[2]
        results["unqualified_selections_std"] = selections_by_qualification[3]
        
        minority_selection_rate = m.minority_selection_rate("race", 1)
        results["minority_selection_avg"] = minority_selection_rate[0]
        results["minority_selection_std"] = minority_selection_rate[1]
        results["minority_selection_best_found"] = minority_selection_rate[2]
        
        best_minority_selection_rate = m.best_minority_selection_rate("race", 1)
        results["best_minority_selection_avg"] = best_minority_selection_rate[0]
        results["best_minority_selection_std"] = best_minority_selection_rate[1]
        
        feature_ratio_by_group = m.feature_ratio_by_group("race", 1, 0, "gagne_sum_t")
        results["feature_ratio_avg"] = feature_ratio_by_group[0]
        results["feature_ratio_std"] = feature_ratio_by_group[1]
        results["feature_ratio_best_found"] = feature_ratio_by_group[2]
        
        best_feature_ratio = m.best_feature_ratio_by_group("race", 1, 0, "gagne_sum_t")
        results["best_feature_ratio_avg"] = best_feature_ratio[0]
        results["best_feature_ratio_std"] = best_feature_ratio[1]
        all_results.append(results)
        
        h = m.local_homogenization()
        results["local_homog_avg"] = h[0]
        results["local_homog_std"] = h[1]
        results["baseline_local_homog_avg"] = h[2]
        results["baseline_local_homog_std"] = h[3]
        print(h)
        
        h = m.global_homogenization("age")
        results["global_homog_avg"] = h[0]
        results["global_homog_std"] = h[1]
        results["baseline_global_homog_avg"] = h[2]
        results["baseline_global_homog_std"] = h[3]
        print(h)
        
        all_results.append(results)

  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.0024964139326860714), np.float64(0.005618975857157076), np.float64(0.2541226865096361), np.float64(0.03439424193538983))
(np.float64(1.4750362549560903), np.float64(0.18458438934857246), np.float64(1.72298646042232), np.float64(0.05647947462537045))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.0033564584779155384), np.float64(0.006329342070079631), np.float64(0.2659454577336911), np.float64(0.028370420187370446))
(np.float64(1.4786636585305488), np.float64(0.18293987403342177), np.float64(1.6950117221674796), np.float64(0.0591973952136307))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.003758212821972739), np.float64(0.006832064564785756), np.float64(0.28527929639344585), np.float64(0.02118543777612456))
(np.float64(1.479451935541859), np.float64(0.1835959526143604), np.float64(1.6894007826658006), np.float64(0.05358461488520905))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.007611203439585802), np.float64(0.008731404273137984), np.float64(0.4396756688337594), np.float64(0.04419518757012058))
(np.float64(1.6485807509148582), np.float64(0.10177897248092925), np.float64(1.72298646042232), np.float64(0.05647947462537045))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.007139132092671659), np.float64(0.009256544267786167), np.float64(0.4486361522480715), np.float64(0.03943641901116265))
(np.float64(1.6514268792672964), np.float64(0.10327112446288467), np.float64(1.6950117221674796), np.float64(0.0591973952136307))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.009460157671797894), np.float64(0.009246690042139358), np.float64(0.48479994107465296), np.float64(0.03555118566247478))
(np.float64(1.6508647941610246), np.float64(0.10323140565550465), np.float64(1.6894007826658006), np.float64(0.05358461488520905))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.020279260593755486), np.float64(0.01172951941057496), np.float64(0.5745430442884651), np.float64(0.040215097571518574))
(np.float64(1.7136180237817737), np.float64(0.08903407090757959), np.float64(1.72298646042232), np.float64(0.05647947462537045))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.013302166739604857), np.float64(0.011281315677484742), np.float64(0.5478770409460804), np.float64(0.04754310516177178))
(np.float64(1.7143846532710707), np.float64(0.08930730565462922), np.float64(1.6950117221674796), np.float64(0.0591973952136307))


  metric.append(np.nanmean(metric_inner))
  metric_flattened = [np.nanmean(inner) for inner in metric]


(np.float64(0.010334325106285812), np.float64(0.009853773978387707), np.float64(0.5640007782569498), np.float64(0.0469139659379654))
(np.float64(1.7140200950364486), np.float64(0.08880572102956107), np.float64(1.6894007826658006), np.float64(0.05358461488520905))


In [16]:
all_results = pd.DataFrame(all_results)
all_results.to_csv("results3.csv", index=False)