In [75]:
import pickle
import time

import numpy as np
import pandas as pd
import lightgbm as lgb

# metrics
from fairlearn.metrics import demographic_parity_difference
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
from sklearn.metrics import f1_score, accuracy_score, mean_squared_error

In [4]:
%load_ext autoreload
%autoreload 2
from main.utils.preprocessing import preprocess_census
from main.estimators.evaluation import cv_early_stopping
from main.fairness.metrics import unfairness, get_all_predictions, calculate_metrics
from main.utils.dataloader import load_sunbelt_data

In [5]:
sunbelt_states = ['AL', 'AZ', 'FL', 'GA', 'LA', 
                  'MS', 'NM', 'SC', 'TX', 'CA']

data_all = load_sunbelt_data(states=sunbelt_states)

cat_features = ['OCCP', 'POBP', 'SCHL', 'RELP']

ret_dict =  preprocess_census(data=data_all,
                              target_feature='PINCP',
                              sensitive_features=['SEX', 'RAC1P_black'],
                              categorical_features=cat_features,
                              continuous_features=['WKHP', 'AGEP'], 
                              objective='classification')

In [None]:
res_dict = {}
res_dict['bench'] = {}
res_dict['ours'] = {}

for seed_ in [42, 1029, 3948, 103, 56, 93983838, 828, 1928838, 900, 10]:


    sunbelt_states = ['AL', 'AZ', 'FL', 'GA', 'LA', 
                    'MS', 'NM', 'SC', 'TX', 'CA']

    data_all = load_sunbelt_data(states=sunbelt_states)

    cat_features = ['OCCP', 'POBP', 'SCHL', 'RELP']

    ret_dict =  preprocess_census(data=data_all,
                                target_feature='PINCP',
                                sensitive_features=['SEX', 'RAC1P_black'],
                                categorical_features=cat_features,
                                continuous_features=['WKHP', 'AGEP'], 
                                objective='classification', 
                                split_seed=seed_)


    params = {
        "objective": "binary",
        "metric": "auc",
        "min_data_in_leaf": 50,
        "learning_rate": 0.05,
        "feature_fraction": 0.9,
        "verbose": -1
    }

    cv_results = cv_early_stopping(params=params, 
                                nfolds=5, 
                                max_rounds=1000, 
                                early_stopping_rounds=20, 
                                X_train=ret_dict['X_train'], 
                                y_train=ret_dict['y_train'], 
                                categorical_feats=cat_features, 
                                objective='classification')


    best_res = np.argmax(cv_results['metric'])
    best_iter = cv_results['iterations'][best_res]

    # Retrain whole model

    start_ours = time.time()

    data_train_all = lgb.Dataset(data=ret_dict['X_train'], 
                                label=ret_dict['y_train'], 
                                categorical_feature=cat_features)

    best_estimator = lgb.train(params=params,
                            train_set=data_train_all, 
                            num_boost_round=best_iter)


    output_check = get_all_predictions(best_estimator, 
                                    ['SEX_2', 'RAC1P_black'], 
                                    ret_dict)


    end_ours = time.time()

    time_ours = end_ours - start_ours


    ret_metrics = calculate_metrics(output_check, 
                    ret_dict['y_test'], 
                    objective='classification', 
                    threshold=0.48)

    accuracy_ours_0 = ret_metrics[('SEX_2', 'RAC1P_black')]['level_0']['accuracy']
    f1_ours_0 = ret_metrics[('SEX_2', 'RAC1P_black')]['level_0']['f1_score']

    accuracy_ours_1 = ret_metrics[('SEX_2', 'RAC1P_black')]['level_1']['accuracy']
    f1_ours_1 = ret_metrics[('SEX_2', 'RAC1P_black')]['level_1']['f1_score']

    preds_level_0 = output_check[('SEX_2', 'RAC1P_black')]['level_0']['prediction']
    preds_level_0 = np.where(preds_level_0 > 0.48, 1, 0)

    preds_level_1 = output_check[('SEX_2', 'RAC1P_black')]['level_1']['prediction']
    preds_level_1 = np.where(preds_level_1 > 0.48, 1, 0)

    fairness_ours_0 = demographic_parity_difference(ret_dict['y_test'],
                              preds_level_0, 
                              sensitive_features=ret_dict['X_test'].SEX_2)

    fairness_ours_1 = (demographic_parity_difference(ret_dict['y_test'],
                                preds_level_1, 
                                sensitive_features=ret_dict['X_test'].RAC1P_black) + 
                    demographic_parity_difference(ret_dict['y_test'],
                                preds_level_1, 
                                sensitive_features=ret_dict['X_test'].SEX_2))    


    # Benchmark 

    time_start_theirs = time.time()

    # specify separately the boosting iterations, as otherwise 
    # the calculations would be prohibitively long...
    classifier_lgbm = lgb.LGBMClassifier(metric='auc', 
                                        min_data_in_leaf='50',
                                        learning_rate=0.05,
                                        feature_fraction=0.9,
                                        num_iterations=best_iter)

    constraint = DemographicParity()
    classifier = classifier_lgbm
    mitigator = ExponentiatedGradient(classifier_lgbm,
                                      constraint,
                                      max_iter=5)

    mitigator.fit(ret_dict['X_train'],
                ret_dict['y_train'],
                sensitive_features=ret_dict['X_train'].SEX_2)

    
    y_pred_mitigated = mitigator.predict(ret_dict['X_test'])

    end_time_theirs = time.time()
    
    time_theirs = end_time_theirs - time_start_theirs


    fairness_theirs_0 = demographic_parity_difference(ret_dict['y_test'],
                              y_pred_mitigated, 
                              sensitive_features=ret_dict['X_test'].SEX_2)

    fairness_theirs_1 = (demographic_parity_difference(ret_dict['y_test'],
                                y_pred_mitigated, 
                                sensitive_features=ret_dict['X_test'].RAC1P_black) + 
                        demographic_parity_difference(ret_dict['y_test'],
                                    y_pred_mitigated, 
                                    sensitive_features=ret_dict['X_test'].SEX_2))


    accuracy_theirs = accuracy_score(ret_dict['y_test'],
                                    y_pred_mitigated)

    f1_theris = f1_score(ret_dict['y_test'],
                         y_pred_mitigated)


    res_dict['bench'][seed_] = {}
    res_dict['ours'][seed_] = {}

    res_dict['bench'][seed_]['acc'] = accuracy_theirs
    res_dict['bench'][seed_]['f1'] = f1_theris
    res_dict['bench'][seed_]['unfair'] = fairness_theirs_0

    res_dict['bench'][seed_]['acc_1'] = accuracy_theirs
    res_dict['bench'][seed_]['f1_1'] = f1_theris
    res_dict['bench'][seed_]['unfair_1'] = fairness_theirs_1

    # Time
    res_dict['bench'][seed_]['time'] = time_theirs

    # Metrics 0
    res_dict['ours'][seed_]['acc'] = accuracy_ours_0
    res_dict['ours'][seed_]['f1'] = f1_ours_0
    res_dict['ours'][seed_]['unfair'] = fairness_ours_0

    # Metrics 1
    res_dict['ours'][seed_]['acc_1'] = accuracy_ours_1
    res_dict['ours'][seed_]['f1_1'] = f1_ours_1
    res_dict['ours'][seed_]['unfair_1'] = fairness_ours_1

    # Time
    res_dict['ours'][seed_]['time'] = time_ours

    with open(f'data/results/output_bivariate_after_seed_{seed_}.pkl', 'wb') as con_:
        pickle.dump(res_dict, con_)



In [258]:
for idx_ in range(pd.DataFrame(res_dict['ours']).shape[0]):
    
    ours_m = pd.DataFrame(res_dict['ours']).iloc[idx_, :].mean()
    ours_s = pd.DataFrame(res_dict['ours']).iloc[idx_, :].std()

    theirs_m = pd.DataFrame(res_dict['bench']).iloc[idx_, :].mean()
    theirs_s = pd.DataFrame(res_dict['bench']).iloc[idx_, :].std()

    print(f' &{np.round(ours_m,3)} $\pm$ {np.round(ours_s,3)}  & {np.round(theirs_m,3)}  $\pm$ {np.round(theirs_s,3)} \\\\')



 &0.809 $\pm$ 0.001  & 0.808  $\pm$ 0.001 \\
 &0.737 $\pm$ 0.001  & 0.73  $\pm$ 0.002 \\
 &0.003 $\pm$ 0.002  & 0.021  $\pm$ 0.002 \\
 &0.804 $\pm$ 0.001  & 0.808  $\pm$ 0.001 \\
 &0.73 $\pm$ 0.001  & 0.73  $\pm$ 0.002 \\
 &0.009 $\pm$ 0.005  & 0.207  $\pm$ 0.005 \\
 &6.319 $\pm$ 0.422  & 100.893  $\pm$ 10.467 \\
