In [1]:
import aif360
import numpy as np
import pandas as pd

from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector

from aif360.metrics import utils
from aif360.datasets import BinaryLabelDataset
from aif360.datasets.multiclass_label_dataset import MulticlassLabelDataset

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
                import load_preproc_data_adult, load_preproc_data_compas


import sklearn 
from sklearn.linear_model import LogisticRegression
import imblearn
import matplotlib.pyplot as plt 
from sklearn.metrics import classification_report, confusion_matrix
import math 


In [2]:
german_data = pd.read_csv('german_data.csv')
print(german_data)

     stat_check_acc  duration_month  credit_history  purpose  credit_amount  \
0                 1               6               5        4           1169   
1                 2              48               3        4           5951   
2                 4              12               5        7           2096   
3                 1              42               3        3           7882   
4                 1              24               4        1           4870   
..              ...             ...             ...      ...            ...   
995               4              12               3        3           1736   
996               1              30               3        2           3857   
997               4              12               3        4            804   
998               1              45               3        4           1845   
999               2              45               5        2           4576   

     Age Group  savings_bonds  employment_since  in

In [3]:
### set sensitive attribute equal to 'sex' or 'Age Group' ###

sen_att = 'Age Group'

In [4]:
#split data in a test val and train set

from sklearn.model_selection import train_test_split
X = german_data.loc[:, german_data.columns != 'approval']
y = german_data.loc[:, german_data.columns == 'approval']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify = y, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, stratify = y_train, random_state=1)


In [5]:
#apply SMOTE to the trainingset
from imblearn.over_sampling import SMOTE
os = SMOTE(random_state=4)

In [6]:
columns = X_train.columns
X_train_balanced, y_train_balanced=os.fit_sample(X_train, y_train)

In [7]:
### combine the sets together to make them compatible with the AIF360 library ### 
balanced_german_train_df = pd.concat([X_train_balanced, y_train_balanced], axis=1, join="inner")

In [8]:
german_val_df = pd.concat([X_val, y_val], axis=1, join="inner")

In [9]:
german_test_df = pd.concat([X_test, y_test], axis=1, join="inner")

In [10]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
dataset_orig_train = aif360.datasets.BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=balanced_german_train_df,
    label_names=['approval'],
    protected_attribute_names=[sen_att])

In [11]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
dataset_orig_valid = aif360.datasets.BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=german_val_df,
    label_names=['approval'],
    protected_attribute_names=[sen_att])

In [12]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
dataset_orig_test = aif360.datasets.BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=german_test_df,
    label_names=['approval'],
    protected_attribute_names=[sen_att])

In [13]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
dataset_orig = aif360.datasets.BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=german_data,
    label_names=['approval'],
    protected_attribute_names=[sen_att])

In [14]:
privileged_groups = [{sen_att: 0}]
unprivileged_groups = [{sen_att: 1}]
cost_constraint = "fnr"
randseed = 12345679 

In [15]:
### TRAINING THE LOGISTIC REGRESSION ###

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve

# Placeholder for predicted and transformed datasets
dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)

dataset_new_valid_pred = dataset_orig_valid.copy(deepcopy=True)
dataset_new_test_pred = dataset_orig_test.copy(deepcopy=True)

# Logistic regression classifier and predictions for training data
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()
lmod = LogisticRegression()
lmod.fit(X_train, y_train)

fav_idx = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]
y_train_pred_prob = lmod.predict_proba(X_train)[:,fav_idx]

# Prediction probs for validation and testing data
X_valid = scale_orig.transform(dataset_orig_valid.features)
y_valid_pred_prob = lmod.predict_proba(X_valid)[:,fav_idx]

X_test = scale_orig.transform(dataset_orig_test.features)
y_test_pred_prob = lmod.predict_proba(X_test)[:,fav_idx]

class_thresh = 0.5
dataset_orig_train_pred.scores = y_train_pred_prob.reshape(-1,1)
dataset_orig_valid_pred.scores = y_valid_pred_prob.reshape(-1,1)
dataset_orig_test_pred.scores = y_test_pred_prob.reshape(-1,1)

y_train_pred = np.zeros_like(dataset_orig_train_pred.labels)
y_train_pred[y_train_pred_prob >= class_thresh] = dataset_orig_train_pred.favorable_label
y_train_pred[~(y_train_pred_prob >= class_thresh)] = dataset_orig_train_pred.unfavorable_label
dataset_orig_train_pred.labels = y_train_pred

y_valid_pred = np.zeros_like(dataset_orig_valid_pred.labels)
y_valid_pred[y_valid_pred_prob >= class_thresh] = dataset_orig_valid_pred.favorable_label
y_valid_pred[~(y_valid_pred_prob >= class_thresh)] = dataset_orig_valid_pred.unfavorable_label
dataset_orig_valid_pred.labels = y_valid_pred
    
y_test_pred = np.zeros_like(dataset_orig_test_pred.labels)
y_test_pred[y_test_pred_prob >= class_thresh] = dataset_orig_test_pred.favorable_label
y_test_pred[~(y_test_pred_prob >= class_thresh)] = dataset_orig_test_pred.unfavorable_label
dataset_orig_test_pred.labels = y_test_pred

In [16]:
### fairness Metrics function ###
from collections import OrderedDict
from aif360.metrics import ClassificationMetric

def compute_metrics(dataset_true, dataset_pred, 
                    unprivileged_groups, privileged_groups,
                    disp = True):
    """ Compute the key metrics """
    classified_metric_pred = ClassificationMetric(dataset_true,
                                                 dataset_pred, 
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
    metrics = OrderedDict()
    metrics["Balanced accuracy"] = 0.5*(classified_metric_pred.true_positive_rate()+
                                             classified_metric_pred.true_negative_rate())
    metrics["Statistical parity difference"] = classified_metric_pred.statistical_parity_difference()
    metrics["Disparate impact"] = classified_metric_pred.disparate_impact()
    metrics["Average odds difference"] = classified_metric_pred.average_odds_difference()
    metrics["Equal opportunity difference"] = classified_metric_pred.equal_opportunity_difference()
    
    if disp:
        for k in metrics:
            print("%s = %.4f" % (k, metrics[k]))

In [17]:
### Perform Calibrated Equality of Odds Pleiss et al., 2017 ###

from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing
from tqdm import tqdm

# Learn qualize odds and apply to create a new dataset ###
cpp = CalibratedEqOddsPostprocessing(privileged_groups = privileged_groups,
                                     unprivileged_groups = unprivileged_groups,
                                     cost_constraint=cost_constraint,
                                     seed=randseed)
cpp = cpp.fit(dataset_orig_valid, dataset_orig_valid_pred)

In [18]:
### Transform test data using the post processing algorithm

dataset_transf_test_pred = cpp.predict(dataset_orig_test_pred)


In [19]:
 ### fairness metrics extraction ### 
    
metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, 
                                  unprivileged_groups, privileged_groups,
                                  disp = True)

Balanced accuracy = 0.7083
Statistical parity difference = 0.3125
Disparate impact = 1.4545
Average odds difference = 0.3959
Equal opportunity difference = 0.1736


In [20]:
### extract test labels and prediction label for traditional model evaluation ###

y_test = dataset_orig_test.labels.ravel()
y_pred = dataset_transf_test_pred.labels.ravel()

### obtain performance metrics ###

matrix = sklearn.metrics.confusion_matrix(y_test, y_pred)
accuracy_score = sklearn.metrics.accuracy_score(y_test, y_pred)
print(matrix, accuracy_score)
print(classification_report(y_test, y_pred))

[[ 34  26]
 [ 21 119]] 0.765
              precision    recall  f1-score   support

         0.0       0.62      0.57      0.59        60
         1.0       0.82      0.85      0.84       140

    accuracy                           0.77       200
   macro avg       0.72      0.71      0.71       200
weighted avg       0.76      0.77      0.76       200

