In [1]:
pip install fairlearn



In [2]:
#defining all load data scenarios
import pandas as pd
from train_eval_functions import *
import importlib
import joblib
from sklearn.model_selection import train_test_split
from collections import defaultdict

def stratify_wrapper(train_df, id_test_df, ood_test_df, n_samples=200):
    train_class_distribution = train_df['mortality_ten_years'].value_counts(normalize=True)
    rounded_distribution = (train_class_distribution * 100).round().astype(int)
    #print(rounded_distribution)
    #print(id_test_df['mortality_ten_years'].value_counts(normalize=True))
    #print(ood_test_df['mortality_ten_years'].value_counts(normalize=True))

    id_test_df = stratified_sample(id_test_df, target_col='mortality_ten_years', n_samples=n_samples, class_distribution=rounded_distribution)
    ood_test_df = stratified_sample(ood_test_df, target_col='mortality_ten_years', n_samples=n_samples, class_distribution=rounded_distribution)

    return train_df, id_test_df, ood_test_df

def fill_na_val(train_df, id_test_df, ood_test_df):
    train_df = train_df.fillna(-999)
    id_test_df = id_test_df.fillna(-999)
    ood_test_df = ood_test_df.fillna(-999)

    X_train, y_train = split_dataset_into_x_y(train_df, 'mortality_ten_years')
    X_id_test, y_id_test = split_dataset_into_x_y(id_test_df, 'mortality_ten_years')
    X_ood_test, y_ood_test = split_dataset_into_x_y(ood_test_df, 'mortality_ten_years')

    return X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test

def impute(train_df, id_test_df, ood_test_df):
    imp_norm_pipe = Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="mean",missing_values=pd.NA)),
            ("normalizer", StandardScaler())])

    X_train, y_train = split_dataset_into_x_y(train_df, 'mortality_ten_years')
    X_id_test, y_id_test = split_dataset_into_x_y(id_test_df, 'mortality_ten_years')
    X_ood_test, y_ood_test = split_dataset_into_x_y(ood_test_df, 'mortality_ten_years')

    all_na_cols = X_train.columns[X_train.isna().all()]
    X_train = X_train.drop(columns=all_na_cols).fillna(np.nan)
    X_train = pd.DataFrame(imp_norm_pipe.fit_transform(X_train), columns=X_train.columns, index=X_train.index)

    X_id_test = X_id_test.drop(columns=all_na_cols).fillna(np.nan)
    X_id_test = pd.DataFrame(imp_norm_pipe.transform(X_id_test), columns=X_id_test.columns, index=X_id_test.index)

    X_ood_test = X_ood_test.drop(columns=all_na_cols).fillna(np.nan)
    X_ood_test = pd.DataFrame(imp_norm_pipe.transform(X_ood_test), columns=X_ood_test.columns, index=X_ood_test.index)

    return X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test

def load_data_scenario_1(imputed=False):
    '''
    Train: 2006-2010
    ID Test: 2012
    OOD Test: 2014-2016
    '''

    data_dict = pd.read_pickle('cleaned_data_stacked_and_dict.pkl')
    train_df = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010']], ignore_index=True)
    id_test_df = data_dict['2012']
    ood_test_df = pd.concat([data_dict['2014'], data_dict['2016']], ignore_index=True)

    train_df, id_test_df, ood_test_df = stratify_wrapper(train_df, id_test_df, ood_test_df, n_samples=200)

    if not imputed:
        return fill_na_val(train_df, id_test_df, ood_test_df)
    else:
        return impute(train_df, id_test_df, ood_test_df)

def load_data_scenario_2(imputed=False):
    '''
    Train: 2006-2008
    ID Test: 2010-2012
    OOD Test: 2014-2016
    '''
    data_dict = pd.read_pickle('cleaned_data_stacked_and_dict.pkl')
    train_df = pd.concat([data_dict['2006'], data_dict['2008']], ignore_index=True)
    id_test_df = pd.concat([data_dict['2010'], data_dict['2012']], ignore_index=True)
    ood_test_df = pd.concat([data_dict['2014'], data_dict['2016']], ignore_index=True)

    train_df, id_test_df, ood_test_df = stratify_wrapper(train_df, id_test_df, ood_test_df, n_samples=200)

    if not imputed:
        return fill_na_val(train_df, id_test_df, ood_test_df)
    else:
        return impute(train_df, id_test_df, ood_test_df)

def load_data_scenario_3(imputed=False):
    '''
    Train: 2006-2010 (80%)
    ID Test: 2006-2010 (20%)
    OOD Test: 2016
    '''
    data_dict = pd.read_pickle('cleaned_data_stacked_and_dict.pkl')

    dataset = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010']], ignore_index=True)
    X_dataset = dataset.drop(columns=["mortality_ten_years"])
    y_dataset = dataset["mortality_ten_years"]
    X_train, X_id_test, y_train, y_id_test = train_test_split(X_dataset, y_dataset, test_size=0.2, random_state=42)

    train_df = pd.concat([X_train, y_train], axis=1)
    id_test_df = pd.concat([X_id_test, y_id_test], axis=1)
    ood_test_df = data_dict['2016']

    train_df, id_test_df, ood_test_df = stratify_wrapper(train_df, id_test_df, ood_test_df, n_samples=200)

    if not imputed:
        return fill_na_val(train_df, id_test_df, ood_test_df)
    else:
        return impute(train_df, id_test_df, ood_test_df)

def load_data_scenario_4(imputed=False):
    '''
    Train: 2006-2014 (80%) all regions except 11 1 2
    ID Test: 2006-2014 (20%) all regions except 11 1 2
    OOD Test: 2016. 11, 1, 2
    '''
    data_dict = pd.read_pickle('cleaned_data_stacked_and_dict.pkl')
    dataset = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010'], data_dict['2012'], data_dict['2014']], ignore_index=True)

    dataset.drop(dataset[dataset['state_live_current'].isin([97, 98, 99, 11, 1, 2])].index, inplace=True)

    X_dataset = dataset.drop(columns=["mortality_ten_years"])
    y_dataset = dataset["mortality_ten_years"]

    X_train, X_id_test, y_train, y_id_test = train_test_split(X_dataset, y_dataset, test_size=0.2, random_state=42)

    train_df = pd.concat([X_train, y_train], axis=1)
    id_test_df = pd.concat([X_id_test, y_id_test], axis=1)
    ood_test_df = data_dict['2016']
    ood_test_df.drop(ood_test_df[~ood_test_df['state_live_current'].isin([11, 1, 2])].index, inplace=True)

    train_df, id_test_df, ood_test_df = stratify_wrapper(train_df, id_test_df, ood_test_df, n_samples=200)

    if not imputed:
        return fill_na_val(train_df, id_test_df, ood_test_df)
    else:
        return impute(train_df, id_test_df, ood_test_df)

def load_data_scenario_5(imputed=False):
    '''
    Train: 2006-2016 (80%) regions 3, 4, 5, 6
    ID Test: 2006-2016 (20%) regions 3, 4, 5, 6
    OOD Test: 2006-2016 region 1
    '''
    data_dict = pd.read_pickle('cleaned_data_stacked_and_dict.pkl')

    dataset = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010'], data_dict['2012'], data_dict['2014'], data_dict['2016']], ignore_index=True)
    dataset.drop(dataset[dataset['race'].isin([-999, 8, 9, 2, 7])].index, inplace=True)

    X_dataset = dataset.drop(columns=["mortality_ten_years"])
    y_dataset = dataset["mortality_ten_years"]

    X_train, X_id_test, y_train, y_id_test = train_test_split(X_dataset, y_dataset, test_size=0.2, random_state=42)

    train_df = pd.concat([X_train, y_train], axis=1)
    id_test_df = pd.concat([X_id_test, y_id_test], axis=1)
    ood_test_df = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010'], data_dict['2012'], data_dict['2014'], data_dict['2016']], ignore_index=True)
    ood_test_df.drop(ood_test_df[~ood_test_df['state_live_current'].isin([1])].index, inplace=True)

    train_df, id_test_df, ood_test_df = stratify_wrapper(train_df, id_test_df, ood_test_df, n_samples=200)

    if not imputed:
        return fill_na_val(train_df, id_test_df, ood_test_df)
    else:
        return impute(train_df, id_test_df, ood_test_df)

def load_data_scenario_6(imputed=False):
    '''
    Train: 2006-2010 (80%)
    ID Test: 2010-2012 (20%)
    OOD Test: 2014-2016
    '''
    data_dict = pd.read_pickle('cleaned_data_stacked_and_dict.pkl')

    dataset = data_dict['2010']
    X_dataset = dataset.drop(columns=["mortality_ten_years"])
    y_dataset = dataset["mortality_ten_years"]

    X_train, X_id_test, y_train, y_id_test = train_test_split(X_dataset, y_dataset, test_size=0.2, random_state=42)

    dataset = pd.concat([data_dict['2006'], data_dict['2008']], ignore_index=True)
    X_train = pd.concat([dataset.drop(columns=["mortality_ten_years"]), X_train])
    y_train = pd.concat([dataset["mortality_ten_years"], y_train])

    id_test_df = data_dict['2012']
    X_id_test = pd.concat([id_test_df, X_id_test])
    y_id_test = pd.concat([id_test_df["mortality_ten_years"], y_id_test])

    train_df = pd.concat([X_train, y_train], axis=1)
    id_test_df = pd.concat([X_id_test.drop(columns=["mortality_ten_years"]), y_id_test], axis=1)
    ood_test_df = pd.concat([data_dict['2014'], data_dict['2016']], ignore_index=True)

    train_df, id_test_df, ood_test_df = stratify_wrapper(train_df, id_test_df, ood_test_df, n_samples=200)

    if not imputed:
        return fill_na_val(train_df, id_test_df, ood_test_df)
    else:
        return impute(train_df, id_test_df, ood_test_df)

In [3]:
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from fairlearn.metrics import *

def get_fairness_metrics_adv(model, X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test, sensitive_attr="", mute=False):

    protected_attributes = ['race', 'gender', 'state_live_current', 'income_current', 'age', 'education_current']

    if sensitive_attr != "":
        protected_attributes = [sensitive_attr]

    all_sp = {}
    all_eo = {}

    for attribute in protected_attributes:
        if not mute: print(f"{attribute}:\n")
        attr_sp = []
        attr_eo = []

        for dataset_name, (X, y, sensitive_attr) in {
            "Train": (imp_norm_drop_miss_pipe.fit_transform(X_train), y_train, X_train[attribute]),
            "ID Test": (imp_norm_drop_miss_pipe.fit_transform(X_id_test), y_id_test, X_id_test[attribute]),
            "OOD Test": (imp_norm_drop_miss_pipe.fit_transform(X_ood_test), y_ood_test, X_ood_test[attribute]),
        }.items():
            y_pred = model.predict(X)
            sp = demographic_parity_difference(y_pred, y, sensitive_features=sensitive_attr)
            if not mute: print(f"Statistical Parity Difference ({dataset_name}): {sp}")
            attr_sp.append(sp)

            eo = equalized_odds_difference(y_pred, y, sensitive_features=sensitive_attr)
            if not mute: print(f"Equalized Odds Difference ({dataset_name}): {eo}")
            attr_eo.append(eo)

        all_sp[attribute] = attr_sp
        all_eo[attribute] = attr_eo

    return all_sp, all_eo


imp_norm_drop_miss_pipe = make_column_transformer(
    (
        Pipeline(
            [
                ("imputer", SimpleImputer(strategy="mean",missing_values=pd.NA)),
                ("normalizer", StandardScaler())
            ]
        ),
        make_column_selector(dtype_include="float64"),
    ),
    (
        Pipeline(
            [
                ("imputer", SimpleImputer(strategy="most_frequent")),
                ("encoder", OrdinalEncoder(handle_unknown="use_encoded_value",unknown_value=-1)),
            ]
        ),
        make_column_selector(dtype_include="category"),
    ),
)

In [None]:
from collections import defaultdict

def train_adv(X_train_imp_drop_normed, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped):

  best_sp, best_eo = float('inf'), float('inf')
  no_improvement_count = 0
  patience = 2

  adv_clf = AdversarialFairnessClassifier(
      backend="torch",
      predictor_model=[200, "relu", 100, "relu", 50, "sigmoid"],
      adversary_model = [50, "relu", 25, "relu", 10, "leaky_relu"],
      batch_size=32,
      learning_rate=0.001,
      alpha=1,
      random_state=42,
      shuffle=True,
      epochs=100,
      constraints="demographic_parity"
  )

  EVAL_EVERY = 5

  metrics_history = defaultdict(list)

  for epoch in range(adv_clf.epochs):
      adv_clf.partial_fit(X_train_imp_drop_normed, y_train_dropped, sensitive_features=X_train_imp_dropped[ADV_SENS_ATTR])

      y_train_pred = adv_clf.predict(X_train_imp_drop_normed)
      acc = accuracy_score(y_train_dropped, y_train_pred)
      metrics_history["epoch"].append(epoch + 1)
      metrics_history["accuracy"].append(acc)

      if epoch % EVAL_EVERY == 0:
        sp, eo = get_fairness_metrics_adv(
            adv_clf, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped,
            sensitive_attr=ADV_SENS_ATTR, mute=True
        )
        metrics_history["sp"].append(sp[ADV_SENS_ATTR][0])
        metrics_history["eo"].append(eo[ADV_SENS_ATTR][0])
        #print((sp, eo))
        if eo[ADV_SENS_ATTR][0] < best_eo:
            best_sp, best_eo = sp[ADV_SENS_ATTR][0], eo[ADV_SENS_ATTR][0]
            no_improvement_count = 0
            print("Fairness metrics improved, continuing...")
        else:
            no_improvement_count += 1
            print(f"No improvement in fairness metrics for {no_improvement_count} evaluations.")

        #stop early
        if no_improvement_count >= patience:
            print("Early stopping: No significant improvement in fairness metrics.")
            break

      # Optionally, print progress
      print(f"Epoch {epoch + 1}/{adv_clf.epochs} - Accuracy: {acc:.4f}")
  return adv_clf, metrics_history

In [38]:
all_was_dist = []
all_sp = []
all_eo = []
all_max_gap = []

ADV_SENS_ATTR = "state_live_current"

In [39]:
X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test = load_data_scenario_1(imputed=True)
X_train_imp = X_train
X_id_test_imp = X_id_test
X_ood_test_imp = X_ood_test

X_train_imp_dropped = X_train_imp[X_train_imp[ADV_SENS_ATTR].notna()]
y_train_dropped = y_train[X_train_imp[ADV_SENS_ATTR].notna()]
X_train_imp_drop_normed = imp_norm_drop_miss_pipe.fit_transform(X_train_imp_dropped)

X_id_test_imp_dropped = X_id_test_imp[X_id_test_imp[ADV_SENS_ATTR].notna()]
y_id_test_dropped = y_id_test[X_id_test_imp[ADV_SENS_ATTR].notna()]
X_id_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_id_test_imp_dropped)

X_ood_test_imp_dropped = X_ood_test_imp[X_ood_test_imp[ADV_SENS_ATTR].notna()]
y_ood_test_dropped = y_ood_test[X_ood_test_imp[ADV_SENS_ATTR].notna()]
X_ood_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_ood_test_imp_dropped)

adv_clf, metrics_history = train_adv(X_train_imp_drop_normed, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped)


  train_df = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010']], ignore_index=True)
  X_train = X_train.drop(columns=all_na_cols).fillna(np.nan)
  X_id_test = X_id_test.drop(columns=all_na_cols).fillna(np.nan)
  X_ood_test = X_ood_test.drop(columns=all_na_cols).fillna(np.nan)


Epoch 1/100 - Accuracy: 0.6314
Epoch 2/100 - Accuracy: 0.6390
Epoch 3/100 - Accuracy: 0.6503
Epoch 4/100 - Accuracy: 0.6623
Epoch 5/100 - Accuracy: 0.6727
Epoch 6/100 - Accuracy: 0.6811
Epoch 7/100 - Accuracy: 0.6900
Epoch 8/100 - Accuracy: 0.6957
Epoch 9/100 - Accuracy: 0.7020
Epoch 10/100 - Accuracy: 0.7071
Epoch 11/100 - Accuracy: 0.7103
Epoch 12/100 - Accuracy: 0.7125
Epoch 13/100 - Accuracy: 0.7147
Epoch 14/100 - Accuracy: 0.7169
Epoch 15/100 - Accuracy: 0.7181
Epoch 16/100 - Accuracy: 0.7195
Epoch 17/100 - Accuracy: 0.7221
Epoch 18/100 - Accuracy: 0.7237
Epoch 19/100 - Accuracy: 0.7251
Epoch 20/100 - Accuracy: 0.7263
Epoch 21/100 - Accuracy: 0.7281
Epoch 22/100 - Accuracy: 0.7290
Epoch 23/100 - Accuracy: 0.7297
Epoch 24/100 - Accuracy: 0.7310
Epoch 25/100 - Accuracy: 0.7324
Epoch 26/100 - Accuracy: 0.7338
Epoch 27/100 - Accuracy: 0.7350
Epoch 28/100 - Accuracy: 0.7361
Epoch 29/100 - Accuracy: 0.7372
Epoch 30/100 - Accuracy: 0.7383
Epoch 31/100 - Accuracy: 0.7394
Epoch 32/100 - Ac

In [40]:
import pickle

with open(f'./{ADV_SENS_ATTR}/metrics_1_eval.pkl', 'wb') as file:
    pickle.dump(metrics_history, file)

evaluate(adv_clf, X_id_test, y_id_test, X_ood_test, y_ood_test, save_as=f"./{ADV_SENS_ATTR}/adv_1_eval.txt")

sp, eo = get_fairness_metrics_adv(
      adv_clf, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped,
      sensitive_attr=ADV_SENS_ATTR, mute=False
  )


all_sp.append(sp)
all_eo.append(eo)

wd = calculate_wasserstein_distance(X_train, X_id_test, X_ood_test, mute=True)
all_was_dist.append(wd)

state_live_current:





Statistical Parity Difference (Train): 0.13039912520503005
Equalized Odds Difference (Train): 0.4691575637206223
Statistical Parity Difference (ID Test): 0.38333333333333336
Equalized Odds Difference (ID Test): 0.5
Statistical Parity Difference (OOD Test): 0.75
Equalized Odds Difference (OOD Test): 0.75


In [41]:
load_num = 2

X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test = load_data_scenario_2(imputed=True)
X_train_imp = X_train
X_id_test_imp = X_id_test
X_ood_test_imp = X_ood_test

X_train_imp_dropped = X_train_imp[X_train_imp[ADV_SENS_ATTR].notna()]
y_train_dropped = y_train[X_train_imp[ADV_SENS_ATTR].notna()]
X_train_imp_drop_normed = imp_norm_drop_miss_pipe.fit_transform(X_train_imp_dropped)

X_id_test_imp_dropped = X_id_test_imp[X_id_test_imp[ADV_SENS_ATTR].notna()]
y_id_test_dropped = y_id_test[X_id_test_imp[ADV_SENS_ATTR].notna()]
X_id_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_id_test_imp_dropped)

X_ood_test_imp_dropped = X_ood_test_imp[X_ood_test_imp[ADV_SENS_ATTR].notna()]
y_ood_test_dropped = y_ood_test[X_ood_test_imp[ADV_SENS_ATTR].notna()]
X_ood_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_ood_test_imp_dropped)

adv_clf, metrics_history = train_adv(X_train_imp_drop_normed, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped)

with open(f'./{ADV_SENS_ATTR}/metrics_{load_num}_eval.pkl', 'wb') as file:
    pickle.dump(metrics_history, file)

evaluate(adv_clf, X_id_test, y_id_test, X_ood_test, y_ood_test, save_as=f"./{ADV_SENS_ATTR}/adv_{load_num}_eval.txt")

sp, eo = get_fairness_metrics_adv(
      adv_clf, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped,
      sensitive_attr=ADV_SENS_ATTR, mute=False
  )


all_sp.append(sp)
all_eo.append(eo)

wd = calculate_wasserstein_distance(X_train, X_id_test, X_ood_test, mute=True)
all_was_dist.append(wd)

  train_df = pd.concat([data_dict['2006'], data_dict['2008']], ignore_index=True)
  X_train = X_train.drop(columns=all_na_cols).fillna(np.nan)
  X_id_test = X_id_test.drop(columns=all_na_cols).fillna(np.nan)
  X_ood_test = X_ood_test.drop(columns=all_na_cols).fillna(np.nan)


Epoch 1/100 - Accuracy: 0.5839
Epoch 2/100 - Accuracy: 0.6070
Epoch 3/100 - Accuracy: 0.6390
Epoch 4/100 - Accuracy: 0.6576
Epoch 5/100 - Accuracy: 0.6690
Epoch 6/100 - Accuracy: 0.6752
Epoch 7/100 - Accuracy: 0.6808
Epoch 8/100 - Accuracy: 0.6833
Epoch 9/100 - Accuracy: 0.6871
Epoch 10/100 - Accuracy: 0.6897
Epoch 11/100 - Accuracy: 0.6929
Epoch 12/100 - Accuracy: 0.6956
Epoch 13/100 - Accuracy: 0.6986
Epoch 14/100 - Accuracy: 0.7014
Epoch 15/100 - Accuracy: 0.7035
Epoch 16/100 - Accuracy: 0.7052
Epoch 17/100 - Accuracy: 0.7054
Epoch 18/100 - Accuracy: 0.7076
Epoch 19/100 - Accuracy: 0.7101
Epoch 20/100 - Accuracy: 0.7124
Epoch 21/100 - Accuracy: 0.7132
Epoch 22/100 - Accuracy: 0.7147
Epoch 23/100 - Accuracy: 0.7163
Epoch 24/100 - Accuracy: 0.7181
Epoch 25/100 - Accuracy: 0.7198
Epoch 26/100 - Accuracy: 0.7209
Epoch 27/100 - Accuracy: 0.7223
Epoch 28/100 - Accuracy: 0.7247
Epoch 29/100 - Accuracy: 0.7252
Epoch 30/100 - Accuracy: 0.7266
Epoch 31/100 - Accuracy: 0.7274
Epoch 32/100 - Ac



Statistical Parity Difference (Train): 0.15481777987075845
Equalized Odds Difference (Train): 0.18710691823899372
Statistical Parity Difference (ID Test): 0.36458333333333337
Equalized Odds Difference (ID Test): 0.8
Statistical Parity Difference (OOD Test): 0.7777777777777778
Equalized Odds Difference (OOD Test): 0.8


In [42]:
load_num = 3

X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test = load_data_scenario_3(imputed=True)
X_train_imp = X_train
X_id_test_imp = X_id_test
X_ood_test_imp = X_ood_test

X_train_imp_dropped = X_train_imp[X_train_imp[ADV_SENS_ATTR].notna()]
y_train_dropped = y_train[X_train_imp[ADV_SENS_ATTR].notna()]
X_train_imp_drop_normed = imp_norm_drop_miss_pipe.fit_transform(X_train_imp_dropped)

X_id_test_imp_dropped = X_id_test_imp[X_id_test_imp[ADV_SENS_ATTR].notna()]
y_id_test_dropped = y_id_test[X_id_test_imp[ADV_SENS_ATTR].notna()]
X_id_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_id_test_imp_dropped)

X_ood_test_imp_dropped = X_ood_test_imp[X_ood_test_imp[ADV_SENS_ATTR].notna()]
y_ood_test_dropped = y_ood_test[X_ood_test_imp[ADV_SENS_ATTR].notna()]
X_ood_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_ood_test_imp_dropped)

adv_clf, metrics_history = train_adv(X_train_imp_drop_normed, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped)

with open(f'./{ADV_SENS_ATTR}/metrics_{load_num}_eval.pkl', 'wb') as file:
    pickle.dump(metrics_history, file)

evaluate(adv_clf, X_id_test, y_id_test, X_ood_test, y_ood_test, save_as=f"./{ADV_SENS_ATTR}/adv_{load_num}_eval.txt")

sp, eo = get_fairness_metrics_adv(
      adv_clf, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped,
      sensitive_attr=ADV_SENS_ATTR, mute=False
  )


all_sp.append(sp)
all_eo.append(eo)

wd = calculate_wasserstein_distance(X_train, X_id_test, X_ood_test, mute=True)
all_was_dist.append(wd)

  dataset = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010']], ignore_index=True)
  X_train = X_train.drop(columns=all_na_cols).fillna(np.nan)
  X_id_test = X_id_test.drop(columns=all_na_cols).fillna(np.nan)
  X_ood_test = X_ood_test.drop(columns=all_na_cols).fillna(np.nan)


Epoch 1/100 - Accuracy: 0.6303
Epoch 2/100 - Accuracy: 0.6381
Epoch 3/100 - Accuracy: 0.6500
Epoch 4/100 - Accuracy: 0.6621
Epoch 5/100 - Accuracy: 0.6722
Epoch 6/100 - Accuracy: 0.6815
Epoch 7/100 - Accuracy: 0.6899
Epoch 8/100 - Accuracy: 0.6959
Epoch 9/100 - Accuracy: 0.7018
Epoch 10/100 - Accuracy: 0.7067
Epoch 11/100 - Accuracy: 0.7100
Epoch 12/100 - Accuracy: 0.7121
Epoch 13/100 - Accuracy: 0.7149
Epoch 14/100 - Accuracy: 0.7171
Epoch 15/100 - Accuracy: 0.7181
Epoch 16/100 - Accuracy: 0.7199
Epoch 17/100 - Accuracy: 0.7218
Epoch 18/100 - Accuracy: 0.7234
Epoch 19/100 - Accuracy: 0.7250
Epoch 20/100 - Accuracy: 0.7270
Epoch 21/100 - Accuracy: 0.7285
Epoch 22/100 - Accuracy: 0.7297
Epoch 23/100 - Accuracy: 0.7305
Epoch 24/100 - Accuracy: 0.7316
Epoch 25/100 - Accuracy: 0.7334
Epoch 26/100 - Accuracy: 0.7344
Epoch 27/100 - Accuracy: 0.7353
Epoch 28/100 - Accuracy: 0.7362
Epoch 29/100 - Accuracy: 0.7373
Epoch 30/100 - Accuracy: 0.7379
Epoch 31/100 - Accuracy: 0.7387
Epoch 32/100 - Ac



Statistical Parity Difference (Train): 0.17210884353741496
Equalized Odds Difference (Train): 0.5396205041064854
Statistical Parity Difference (ID Test): 0.4246031746031746
Equalized Odds Difference (ID Test): 0.6666666666666667
Statistical Parity Difference (OOD Test): 0.7
Equalized Odds Difference (OOD Test): 0.8


In [43]:
load_num = 4

X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test = load_data_scenario_4(imputed=True)
X_train_imp = X_train
X_id_test_imp = X_id_test
X_ood_test_imp = X_ood_test

X_train_imp_dropped = X_train_imp[X_train_imp[ADV_SENS_ATTR].notna()]
y_train_dropped = y_train[X_train_imp[ADV_SENS_ATTR].notna()]
X_train_imp_drop_normed = imp_norm_drop_miss_pipe.fit_transform(X_train_imp_dropped)

X_id_test_imp_dropped = X_id_test_imp[X_id_test_imp[ADV_SENS_ATTR].notna()]
y_id_test_dropped = y_id_test[X_id_test_imp[ADV_SENS_ATTR].notna()]
X_id_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_id_test_imp_dropped)

X_ood_test_imp_dropped = X_ood_test_imp[X_ood_test_imp[ADV_SENS_ATTR].notna()]
y_ood_test_dropped = y_ood_test[X_ood_test_imp[ADV_SENS_ATTR].notna()]
X_ood_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_ood_test_imp_dropped)

adv_clf, metrics_history = train_adv(X_train_imp_drop_normed, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped)

with open(f'./{ADV_SENS_ATTR}/metrics_{load_num}_eval.pkl', 'wb') as file:
    pickle.dump(metrics_history, file)

evaluate(adv_clf, X_id_test, y_id_test, X_ood_test, y_ood_test, save_as=f"./{ADV_SENS_ATTR}/adv_{load_num}_eval.txt")

sp, eo = get_fairness_metrics_adv(
      adv_clf, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped,
      sensitive_attr=ADV_SENS_ATTR, mute=False
  )


all_sp.append(sp)
all_eo.append(eo)

wd = calculate_wasserstein_distance(X_train, X_id_test, X_ood_test, mute=True)
all_was_dist.append(wd)

  dataset = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010'], data_dict['2012'], data_dict['2014']], ignore_index=True)
  X_train = X_train.drop(columns=all_na_cols).fillna(np.nan)
  X_id_test = X_id_test.drop(columns=all_na_cols).fillna(np.nan)
  X_ood_test = X_ood_test.drop(columns=all_na_cols).fillna(np.nan)


Epoch 1/100 - Accuracy: 0.7039
Epoch 2/100 - Accuracy: 0.7039
Epoch 3/100 - Accuracy: 0.7040
Epoch 4/100 - Accuracy: 0.7044
Epoch 5/100 - Accuracy: 0.7054
Epoch 6/100 - Accuracy: 0.7090
Epoch 7/100 - Accuracy: 0.7157
Epoch 8/100 - Accuracy: 0.7236
Epoch 9/100 - Accuracy: 0.7301
Epoch 10/100 - Accuracy: 0.7347
Epoch 11/100 - Accuracy: 0.7378
Epoch 12/100 - Accuracy: 0.7400
Epoch 13/100 - Accuracy: 0.7422
Epoch 14/100 - Accuracy: 0.7447
Epoch 15/100 - Accuracy: 0.7467
Epoch 16/100 - Accuracy: 0.7487
Epoch 17/100 - Accuracy: 0.7507
Epoch 18/100 - Accuracy: 0.7521
Epoch 19/100 - Accuracy: 0.7527
Epoch 20/100 - Accuracy: 0.7535
Fairness metrics improved, continuing...
Epoch 21/100 - Accuracy: 0.7543
Epoch 22/100 - Accuracy: 0.7548
Epoch 23/100 - Accuracy: 0.7555
Epoch 24/100 - Accuracy: 0.7566
Epoch 25/100 - Accuracy: 0.7574
Fairness metrics improved, continuing...
Epoch 26/100 - Accuracy: 0.7589
Epoch 27/100 - Accuracy: 0.7598
Epoch 28/100 - Accuracy: 0.7616
Epoch 29/100 - Accuracy: 0.7627



Statistical Parity Difference (Train): 0.052602436323366586
Equalized Odds Difference (Train): 0.09139784946236562
Statistical Parity Difference (ID Test): 0.23203026481715008
Equalized Odds Difference (ID Test): 0.5
Statistical Parity Difference (OOD Test): 0.21875
Equalized Odds Difference (OOD Test): 0.6666666666666666


In [44]:
load_num = 5

X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test = load_data_scenario_5(imputed=True)
X_train_imp = X_train
X_id_test_imp = X_id_test
X_ood_test_imp = X_ood_test

X_train_imp_dropped = X_train_imp[X_train_imp[ADV_SENS_ATTR].notna()]
y_train_dropped = y_train[X_train_imp[ADV_SENS_ATTR].notna()]
X_train_imp_drop_normed = imp_norm_drop_miss_pipe.fit_transform(X_train_imp_dropped)

X_id_test_imp_dropped = X_id_test_imp[X_id_test_imp[ADV_SENS_ATTR].notna()]
y_id_test_dropped = y_id_test[X_id_test_imp[ADV_SENS_ATTR].notna()]
X_id_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_id_test_imp_dropped)

X_ood_test_imp_dropped = X_ood_test_imp[X_ood_test_imp[ADV_SENS_ATTR].notna()]
y_ood_test_dropped = y_ood_test[X_ood_test_imp[ADV_SENS_ATTR].notna()]
X_ood_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_ood_test_imp_dropped)

adv_clf, metrics_history = train_adv(X_train_imp_drop_normed, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped)


with open(f'./{ADV_SENS_ATTR}/metrics_{load_num}_eval.pkl', 'wb') as file:
    pickle.dump(metrics_history, file)

evaluate(adv_clf, X_id_test, y_id_test, X_ood_test, y_ood_test, save_as=f"./{ADV_SENS_ATTR}/adv_{load_num}_eval.txt")

sp, eo = get_fairness_metrics_adv(
      adv_clf, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped,
      sensitive_attr=ADV_SENS_ATTR, mute=False
  )


all_sp.append(sp)
all_eo.append(eo)

wd = calculate_wasserstein_distance(X_train, X_id_test, X_ood_test, mute=True)
all_was_dist.append(wd)

  dataset = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010'], data_dict['2012'], data_dict['2014'], data_dict['2016']], ignore_index=True)
  ood_test_df = pd.concat([data_dict['2006'], data_dict['2008'], data_dict['2010'], data_dict['2012'], data_dict['2014'], data_dict['2016']], ignore_index=True)
  X_train = X_train.drop(columns=all_na_cols).fillna(np.nan)
  X_id_test = X_id_test.drop(columns=all_na_cols).fillna(np.nan)
  X_ood_test = X_ood_test.drop(columns=all_na_cols).fillna(np.nan)


Epoch 1/100 - Accuracy: 0.7320
Epoch 2/100 - Accuracy: 0.7320
Epoch 3/100 - Accuracy: 0.7320
Epoch 4/100 - Accuracy: 0.7320
Epoch 5/100 - Accuracy: 0.7320
Epoch 6/100 - Accuracy: 0.7322
Epoch 7/100 - Accuracy: 0.7333
Epoch 8/100 - Accuracy: 0.7355
Epoch 9/100 - Accuracy: 0.7402
Epoch 10/100 - Accuracy: 0.7460
Fairness metrics improved, continuing...
Epoch 11/100 - Accuracy: 0.7503
Epoch 12/100 - Accuracy: 0.7533
Epoch 13/100 - Accuracy: 0.7552
Epoch 14/100 - Accuracy: 0.7572
Epoch 15/100 - Accuracy: 0.7587
Fairness metrics improved, continuing...
Epoch 16/100 - Accuracy: 0.7603
Epoch 17/100 - Accuracy: 0.7621
Epoch 18/100 - Accuracy: 0.7634
Epoch 19/100 - Accuracy: 0.7646
Epoch 20/100 - Accuracy: 0.7656
Fairness metrics improved, continuing...
Epoch 21/100 - Accuracy: 0.7662
Epoch 22/100 - Accuracy: 0.7661
Epoch 23/100 - Accuracy: 0.7669
Epoch 24/100 - Accuracy: 0.7670
Epoch 25/100 - Accuracy: 0.7674
No improvement in fairness metrics for 1 evaluations.
Epoch 26/100 - Accuracy: 0.7678




Statistical Parity Difference (Train): 0.3564356435643564
Equalized Odds Difference (Train): 0.7884057971014493
Statistical Parity Difference (ID Test): 0.5454545454545454
Equalized Odds Difference (ID Test): 1.0
Statistical Parity Difference (OOD Test): 0.0
Equalized Odds Difference (OOD Test): 0.0


In [45]:
load_num = 6

X_train, y_train, X_id_test, y_id_test, X_ood_test, y_ood_test = load_data_scenario_6(imputed=True)
X_train_imp = X_train
X_id_test_imp = X_id_test
X_ood_test_imp = X_ood_test

X_train_imp_dropped = X_train_imp[X_train_imp[ADV_SENS_ATTR].notna()]
y_train_dropped = y_train[X_train_imp[ADV_SENS_ATTR].notna()]
X_train_imp_drop_normed = imp_norm_drop_miss_pipe.fit_transform(X_train_imp_dropped)

X_id_test_imp_dropped = X_id_test_imp[X_id_test_imp[ADV_SENS_ATTR].notna()]
y_id_test_dropped = y_id_test[X_id_test_imp[ADV_SENS_ATTR].notna()]
X_id_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_id_test_imp_dropped)

X_ood_test_imp_dropped = X_ood_test_imp[X_ood_test_imp[ADV_SENS_ATTR].notna()]
y_ood_test_dropped = y_ood_test[X_ood_test_imp[ADV_SENS_ATTR].notna()]
X_ood_test_imp_drop_normed = imp_norm_drop_miss_pipe.transform(X_ood_test_imp_dropped)

adv_clf, metrics_history = train_adv(X_train_imp_drop_normed, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped)

with open(f'./{ADV_SENS_ATTR}/metrics_{load_num}_eval.pkl', 'wb') as file:
    pickle.dump(metrics_history, file)

evaluate(adv_clf, X_id_test, y_id_test, X_ood_test, y_ood_test, save_as=f"./{ADV_SENS_ATTR}/adv_{load_num}_eval.txt")

sp, eo = get_fairness_metrics_adv(
      adv_clf, X_train_imp_dropped, y_train_dropped, X_id_test_imp_dropped, y_id_test_dropped, X_ood_test_imp_dropped, y_ood_test_dropped,
      sensitive_attr=ADV_SENS_ATTR, mute=False
  )


all_sp.append(sp)
all_eo.append(eo)

wd = calculate_wasserstein_distance(X_train, X_id_test, X_ood_test, mute=True)
all_was_dist.append(wd)

  dataset = pd.concat([data_dict['2006'], data_dict['2008']], ignore_index=True)
  X_train = pd.concat([dataset.drop(columns=["mortality_ten_years"]), X_train])
  X_train = X_train.drop(columns=all_na_cols).fillna(np.nan)
  X_id_test = X_id_test.drop(columns=all_na_cols).fillna(np.nan)
  X_ood_test = X_ood_test.drop(columns=all_na_cols).fillna(np.nan)


Epoch 1/100 - Accuracy: 0.6261
Epoch 2/100 - Accuracy: 0.6353
Epoch 3/100 - Accuracy: 0.6487
Epoch 4/100 - Accuracy: 0.6612
Epoch 5/100 - Accuracy: 0.6716
Epoch 6/100 - Accuracy: 0.6805
Epoch 7/100 - Accuracy: 0.6885
Epoch 8/100 - Accuracy: 0.6948
Epoch 9/100 - Accuracy: 0.7005
Epoch 10/100 - Accuracy: 0.7053
Epoch 11/100 - Accuracy: 0.7089
Epoch 12/100 - Accuracy: 0.7113
Epoch 13/100 - Accuracy: 0.7136
Epoch 14/100 - Accuracy: 0.7156
Epoch 15/100 - Accuracy: 0.7169
Epoch 16/100 - Accuracy: 0.7185
Epoch 17/100 - Accuracy: 0.7204
Epoch 18/100 - Accuracy: 0.7226
Epoch 19/100 - Accuracy: 0.7237
Epoch 20/100 - Accuracy: 0.7254
Epoch 21/100 - Accuracy: 0.7266
Epoch 22/100 - Accuracy: 0.7287
Epoch 23/100 - Accuracy: 0.7289
Epoch 24/100 - Accuracy: 0.7300
Epoch 25/100 - Accuracy: 0.7313
Epoch 26/100 - Accuracy: 0.7330
Epoch 27/100 - Accuracy: 0.7348
Epoch 28/100 - Accuracy: 0.7353
Epoch 29/100 - Accuracy: 0.7363
Epoch 30/100 - Accuracy: 0.7375
Epoch 31/100 - Accuracy: 0.7387
Epoch 32/100 - Ac



Statistical Parity Difference (Train): 0.1844262295081967
Equalized Odds Difference (Train): 0.33333333333333337
Statistical Parity Difference (ID Test): 0.3
Equalized Odds Difference (ID Test): 0.5555555555555556
Statistical Parity Difference (OOD Test): 0.75
Equalized Odds Difference (OOD Test): 0.8


In [46]:
#save

import pickle

with open(f'./{ADV_SENS_ATTR}/adv_was_dist.pkl', 'wb') as file:
    pickle.dump(all_was_dist, file)

with open(f'./{ADV_SENS_ATTR}/adv_sp.pkl', 'wb') as file:
    pickle.dump(all_sp, file)

with open(f'./{ADV_SENS_ATTR}/adv_eo.pkl', 'wb') as file:
    pickle.dump(all_eo, file)

In [47]:
!zip -r /content/state_live_current.zip /content/state_live_current

  adding: content/state_live_current/ (stored 0%)
  adding: content/state_live_current/metrics_3_eval.pkl (deflated 27%)
  adding: content/state_live_current/adv_eo.pkl (deflated 42%)
  adding: content/state_live_current/adv_5_eval.txt (deflated 72%)
  adding: content/state_live_current/metrics_1_eval.pkl (deflated 29%)
  adding: content/state_live_current/adv_2_eval.txt (deflated 71%)
  adding: content/state_live_current/metrics_6_eval.pkl (deflated 27%)
  adding: content/state_live_current/adv_3_eval.txt (deflated 71%)
  adding: content/state_live_current/adv_6_eval.txt (deflated 71%)
  adding: content/state_live_current/adv_sp.pkl (deflated 47%)
  adding: content/state_live_current/metrics_2_eval.pkl (deflated 28%)
  adding: content/state_live_current/adv_was_dist.pkl (deflated 52%)
  adding: content/state_live_current/metrics_5_eval.pkl (deflated 29%)
  adding: content/state_live_current/adv_1_eval.txt (deflated 71%)
  adding: content/state_live_current/adv_4_eval.txt (deflated 71%

In [48]:
!zip -r /content/gender.zip /content/gender

  adding: content/gender/ (stored 0%)
  adding: content/gender/metrics_3_eval.pkl (deflated 26%)
  adding: content/gender/adv_eo.pkl (deflated 17%)
  adding: content/gender/adv_5_eval.txt (deflated 73%)
  adding: content/gender/metrics_1_eval.pkl (deflated 27%)
  adding: content/gender/adv_2_eval.txt (deflated 71%)
  adding: content/gender/metrics_6_eval.pkl (deflated 27%)
  adding: content/gender/adv_3_eval.txt (deflated 71%)
  adding: content/gender/adv_6_eval.txt (deflated 71%)
  adding: content/gender/adv_sp.pkl (deflated 41%)
  adding: content/gender/metrics_2_eval.pkl (deflated 28%)
  adding: content/gender/adv_was_dist.pkl (deflated 52%)
  adding: content/gender/metrics_5_eval.pkl (deflated 26%)
  adding: content/gender/adv_1_eval.txt (deflated 71%)
  adding: content/gender/adv_4_eval.txt (deflated 70%)
  adding: content/gender/metrics_4_eval.pkl (deflated 23%)


In [49]:
!zip -r /content/race.zip /content/race

  adding: content/race/ (stored 0%)
  adding: content/race/metrics_3_eval.pkl (deflated 31%)
  adding: content/race/adv_eo.pkl (deflated 48%)
  adding: content/race/adv_5_eval.txt (deflated 76%)
  adding: content/race/metrics_1_eval.pkl (deflated 30%)
  adding: content/race/adv_2_eval.txt (deflated 71%)
  adding: content/race/metrics_6_eval.pkl (deflated 31%)
  adding: content/race/adv_3_eval.txt (deflated 70%)
  adding: content/race/adv_6_eval.txt (deflated 71%)
  adding: content/race/adv_sp.pkl (deflated 46%)
  adding: content/race/metrics_2_eval.pkl (deflated 30%)
  adding: content/race/adv_was_dist.pkl (deflated 58%)
  adding: content/race/metrics_5_eval.pkl (deflated 30%)
  adding: content/race/adv_1_eval.txt (deflated 71%)
  adding: content/race/adv_4_eval.txt (deflated 70%)
  adding: content/race/metrics_4_eval.pkl (deflated 22%)
