In [1]:
import pandas as pd

# Configuration

In [2]:
# Auto-reload the custom package
%load_ext autoreload
%autoreload 1
%aimport fairness_multiverse


In [8]:
RUN_TO_ANALYSE = "1"

In [10]:
PREFIX_SETTINGS = "sett_"
PREFIX_EVAL = "sett_eval_"
PREFIX_PERFORMANCE = "perf_"
PREFIX_FAIRNESS = "fair_"

In [11]:
from pathlib import Path

OUTPUT_DIR = Path(".") / "output"

# Directory that will contain outputs from analysis
ANALYSIS_OUTPUT_DIR = OUTPUT_DIR / "analyses" / (str(RUN_TO_ANALYSE))
ANALYSIS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

DATA_DIR = OUTPUT_DIR / "runs" / str(RUN_TO_ANALYSE) / "data"

In [12]:
main_fairness_metric = "fair_main_equalized_odds_difference"

## Data Loading

In [13]:
df_agg_raw = pd.read_csv(DATA_DIR / f"agg_{RUN_TO_ANALYSE}_run_outputs.csv.gz")

In [14]:
import json

df_settings = pd.json_normalize(
    df_agg_raw["universe_settings"].apply(json.loads)
).add_prefix(
    PREFIX_SETTINGS
)

df_agg_full = df_settings.join(df_agg_raw)
df_agg_full.head()

Unnamed: 0.1,sett_cutoff,sett_eval_fairness_grouping,sett_exclude_features,sett_exclude_subgroups,sett_model,Unnamed: 0,run_no,universe_id,universe_settings,execution_time,...,perf_grp_precision_0,perf_grp_precision_1,perf_grp_false positive rate_0,perf_grp_false positive rate_1,perf_grp_false negative rate_0,perf_grp_false negative rate_1,perf_grp_selection rate_0,perf_grp_selection rate_1,perf_grp_count_0,perf_grp_count_1
0,quantile_0.1,majority-minority,none,keep-all,rf,0,1,1a046868f549224919d5aea86afcaf91,"{""cutoff"": ""quantile_0.1"", ""eval_fairness_grou...",22.449074,...,,,,,,,,,,
1,quantile_0.1,nationality-all,none,keep-all,rf,1,1,1a046868f549224919d5aea86afcaf91,"{""cutoff"": ""quantile_0.1"", ""eval_fairness_grou...",22.449074,...,0.122124,0.138954,0.892417,0.936465,0.047297,0.00744,0.899367,0.943877,20540.0,69170.0
2,quantile_0.25,majority-minority,none,keep-all,rf,2,1,1a046868f549224919d5aea86afcaf91,"{""cutoff"": ""quantile_0.25"", ""eval_fairness_gro...",22.449074,...,,,,,,,,,,
3,quantile_0.25,nationality-all,none,keep-all,rf,3,1,1a046868f549224919d5aea86afcaf91,"{""cutoff"": ""quantile_0.25"", ""eval_fairness_gro...",22.449074,...,0.142143,0.163804,0.679837,0.738131,0.135557,0.050328,0.70112,0.766084,20540.0,69170.0
4,quantile_0.1,majority-minority,sex,drop-non-german,logreg,4,1,aeda81ea848062c8b09ab80466cf07c3,"{""cutoff"": ""quantile_0.1"", ""eval_fairness_grou...",174.308044,...,,,,,,,,,,


In [15]:
rows, columns = df_agg_full.shape
print(f"The data has N = {rows} rows and N = {columns} columns.")

The data has N = 128 rows and N = 56 columns.


In [16]:
cols_settings = list(df_agg_full.columns[df_agg_full.columns.str.startswith(PREFIX_SETTINGS)])
cols_eval = list(df_agg_full.columns[df_agg_full.columns.str.startswith(PREFIX_EVAL)])
cols_non_eval = list(set(cols_settings) - set(cols_eval))
cols_performance = list(df_agg_full.columns[df_agg_full.columns.str.startswith(PREFIX_PERFORMANCE)])
cols_fairness = list(df_agg_full.columns[df_agg_full.columns.str.startswith(PREFIX_FAIRNESS)])


In [17]:
df_agg_full["universe_id"]

0      1a046868f549224919d5aea86afcaf91
1      1a046868f549224919d5aea86afcaf91
2      1a046868f549224919d5aea86afcaf91
3      1a046868f549224919d5aea86afcaf91
4      aeda81ea848062c8b09ab80466cf07c3
                     ...               
123    26d2c708e959f2c346ebb7bcb62376f8
124    418d84cd0641196cf291bbbd3833bfcd
125    418d84cd0641196cf291bbbd3833bfcd
126    418d84cd0641196cf291bbbd3833bfcd
127    418d84cd0641196cf291bbbd3833bfcd
Name: universe_id, Length: 128, dtype: object

In [18]:
drop_mask = df_agg_full["sett_exclude_subgroups"].isin(['drop-non-german'])
print(f"Dropping N = {drop_mask.sum()} rows, keeping N = {(~drop_mask).sum()}")

df_agg_full = df_agg_full[~drop_mask]

Dropping N = 64 rows, keeping N = 64


In [19]:
df_agg_full["sett_exclude_subgroups"].unique()

array(['keep-all'], dtype=object)

In [20]:
df_agg_full["sett_eval_fairness_grouping"].unique()

array(['majority-minority', 'nationality-all'], dtype=object)

In [22]:
#df_agg_full["sett_eval_exclude_subgroups"].unique()

In [24]:
#df_agg_full["sett_eval_on_subset"].unique()