In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from autogluon.tabular import TabularPredictor
import fairness_metrics
from data_loaders import AdultDataset

label = "income"
protected_feature = "sex"

# Adult dataset


## Loading Data


In [2]:
adult_dataset = AdultDataset()
adult_df = adult_dataset.load_data()
print(adult_df.shape)
adult_df.head()

(48842, 13)


Unnamed: 0,age,workclass,education,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,Bachelors,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,Bachelors,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,HS-grad,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,11th,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,Bachelors,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [3]:
assert adult_df.isnull().sum().sum() == 0
assert len(adult_df["income"].unique()) == 2
assert len(adult_df["sex"].unique()) == 2

In [4]:
adult_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             48842 non-null  int64 
 1   workclass       48842 non-null  object
 2   education       48842 non-null  object
 3   marital-status  48842 non-null  object
 4   occupation      48842 non-null  object
 5   relationship    48842 non-null  object
 6   race            48842 non-null  object
 7   sex             48842 non-null  object
 8   capital-gain    48842 non-null  int64 
 9   capital-loss    48842 non-null  int64 
 10  hours-per-week  48842 non-null  int64 
 11  native-country  48842 non-null  object
 12  income          48842 non-null  object
dtypes: int64(4), object(9)
memory usage: 4.8+ MB


## Comparing the data

Here I'm making sure that the data is the same one that the paper is using.


In [5]:
mask_50k = adult_df["income"] == ">50K"
mask_male = adult_df["sex"] == "Male"
mask_female = adult_df["sex"] == "Female"
previleged_males = sum(mask_50k & mask_male)
previleged_females = sum(mask_50k & mask_female)

print(
    f"{'Males with >50K income: ':<26}",
    previleged_males,
    "-",
    f"{previleged_males / sum(mask_male):.1%} of all males",
)
print(
    "Females with >50K income: ",
    previleged_females,
    "-",
    f"{previleged_females / sum(mask_female):.1%} of all females",
)

Males with >50K income:    9918 - 30.4% of all males
Females with >50K income:  1769 - 10.9% of all females


In [6]:
train_df, test_df = adult_dataset.get_train_test_data()

assert len(train_df) + len(test_df) == len(adult_df)
assert len(train_df) > 0
assert len(test_df) > 0

In [16]:
models_to_train = {"CAT": {}, "GBM": {}, "RF": {}, "XGB": {}}


def train_predictor(data):
    return TabularPredictor(
        label="income", eval_metric="roc_auc", path="ag_models_original_adult_dataset"
    ).fit(
        train_data=data,
        presets="medium_quality",
        num_bag_folds=0,
        num_bag_sets=0,
        num_stack_levels=0,
        hyperparameters=models_to_train,
        verbosity=1,
    )

In [17]:
predictor = train_predictor(train_df)

AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])


In [9]:
print(f'\n{"Model":<15} {"ROC AUC":<8} | {"SPD":<5}')

list_protected = [1 if p == "Female" else 0 for p in test_df[protected_feature]]
for model in predictor.get_model_names():
    if "WeightedEnsemble" in model:
        continue

    predictions = predictor.predict_proba(test_df, model=model)[">50K"]
    model_dsp = np.empty(
        101,
    )

    for i, threshold in enumerate(np.arange(0.0, 1.0, 0.01)):
        model_dsp[i] = fairness_metrics.DSP(
            [1 if p >= threshold else 0 for p in predictions], list_protected
        )

    format_mean_std = f"{model_dsp.mean():.3f} ± {model_dsp.std():.3f}"

    print(
        f'{model:<15} {predictor.evaluate(test_df, model=model, silent=True)["roc_auc"]:<8.3f} | {format_mean_std}',
    )


Model           ROC AUC  | SPD  
LightGBM        0.925    | 0.187 ± 0.110
RandomForest    0.896    | 0.190 ± 0.092
CatBoost        0.926    | 0.188 ± 0.110
XGBoost         0.927    | 0.188 ± 0.110


### Inference

All the base values are very close to the paper.


## Benchmarking with the synthetic datasets

The paper does not specify how many rows were generated, so I generated 10.000 rows.


In [10]:
# Train dataframe that will be used to train the synthetic datasets
train_df.to_csv("../Datasets/output/adult-train.csv", index=False)

## Loading the synthetic datasets

This dataset was created using Mostly.ai and was augmented by inserting the probabilities for the positive target feature, as well as an interpolation for the unprivileged group to match the probabilities of the previlidges group.


In [14]:
synthetic_df = pd.read_csv("../Datasets/output/adult-probabilistic.csv")
synthetic_df.head()

Unnamed: 0,age,workclass,education,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income_synthetic,income_model_probability,bin,bin_centers,interpolated_probability
0,51,Private,HS-grad,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,50,United-States,1,0.869992,"(0.8649923, 0.8699926]",0.867492,
1,38,Private,HS-grad,Married-civ-spouse,Sales,Wife,White,Female,0,0,40,United-States,0,0.083617,"(0.0833419, 0.0866748]",0.085008,0.810349
2,43,Private,Bachelors,Married-civ-spouse,Sales,Husband,Asian-Pac-Islander,Male,0,0,98,South,0,0.143339,"(0.1400086, 0.1433407]",0.141675,
3,47,Private,HS-grad,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,45,Mexico,0,0.030005,"(0.02893321, 0.03000648]",0.02947,
4,41,Private,HS-grad,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,1,0.345514,"(0.3394803, 0.3460859]",0.342783,
