In [1]:
import aif360
import numpy as np
import pandas as pd

from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector

from aif360.metrics import utils
from aif360.datasets import BinaryLabelDataset
from aif360.datasets.multiclass_label_dataset import MulticlassLabelDataset
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing, EqOddsPostprocessing, RejectOptionClassification



import sklearn 
from sklearn.linear_model import LogisticRegression
import imblearn
import matplotlib.pyplot as plt 
from sklearn.metrics import classification_report, confusion_matrix
import math 

In [2]:
### set sensitive attribute equal to 'SEX' or 'Age Group' ###

sen_att = 'SEX'

In [3]:
taiwan_data = pd.read_csv('taiwan_data.csv')
print(taiwan_data)

       LIMIT_BAL  SEX  EDUCATION  MARRIAGE  Age Group  PAY_0  PAY_2  PAY_3  \
0          20000    1          2         1          0      2      2     -1   
1         120000    1          2         2          0     -1      2      0   
2          90000    1          2         2          0      0      0      0   
3          50000    1          2         1          0      0      0      0   
4          50000    0          2         1          1     -1      0     -1   
...          ...  ...        ...       ...        ...    ...    ...    ...   
29995     220000    0          3         1          0      0      0      0   
29996     150000    0          3         2          0     -1     -1     -1   
29997      30000    0          2         2          0      4      3      2   
29998      80000    0          3         1          0      1     -1      0   
29999      50000    0          2         1          0      0      0      0   

       PAY_4  PAY_5  ...  BILL_AMT4  BILL_AMT5  BILL_AMT6  PAY_

In [4]:
#split data in a test val and train set

from sklearn.model_selection import train_test_split
X = taiwan_data.loc[:, taiwan_data.columns != 'Creditworthiness']
y = taiwan_data.loc[:, taiwan_data.columns == 'Creditworthiness']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify = y, random_state=0)


In [5]:
#apply SMOTE to the trainingset
from imblearn.over_sampling import SMOTE
os = SMOTE(random_state=4)

In [6]:
columns = X_train.columns
X_train_balanced, y_train_balanced=os.fit_sample(X_train, y_train)

In [7]:
X_train_balanced

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,Age Group,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,420000,1,2,2,0,0,0,0,0,0,...,278407,284777,301438,308571,10000,10200,11000,21500,12000,15000
1,230000,1,5,1,0,-1,-1,-1,-1,-2,...,8100,-2900,-2900,-2900,0,8100,0,0,0,110256
2,30000,1,2,1,0,2,2,2,2,2,...,17272,18131,17711,19007,3141,0,1434,0,1581,1705
3,30000,1,2,1,1,2,2,2,2,3,...,23057,24506,24711,24178,1300,1082,2131,900,0,2200
4,390000,1,1,1,0,-1,0,-1,-1,-1,...,15216,16659,35241,14216,42000,15224,16659,35241,14216,72944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37377,190000,0,2,2,0,2,2,2,2,2,...,157855,160419,163473,167473,6238,11506,6558,6500,6707,7000
37378,100000,0,1,1,0,0,1,0,0,0,...,6167,6601,7280,392,542,848,1697,2821,0,409
37379,20000,1,1,2,0,0,0,0,0,2,...,5963,6615,6277,2761,1307,1254,861,0,138,2101
37380,50000,0,2,2,0,0,0,0,0,0,...,36622,18559,14616,16070,1156,1633,765,2686,2867,3264


In [8]:
y_train_balanced

Unnamed: 0,Creditworthiness
0,1
1,1
2,0
3,0
4,1
...,...
37377,0
37378,0
37379,0
37380,0


In [9]:
balanced_taiwan_train_df = pd.concat([X_train_balanced, y_train_balanced], axis=1, join="inner")

In [10]:
taiwan_test_df = pd.concat([X_test, y_test], axis=1, join="inner")

In [11]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
dataset_orig_train = aif360.datasets.BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=balanced_taiwan_train_df,
    label_names=['Creditworthiness'],
    protected_attribute_names=[sen_att])

In [12]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
dataset_orig_test = aif360.datasets.BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=taiwan_test_df,
    label_names=['Creditworthiness'],
    protected_attribute_names=[sen_att])

In [13]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
dataset_orig = aif360.datasets.BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=taiwan_data,
    label_names=['Creditworthiness'],
    protected_attribute_names=[sen_att])

In [14]:
privileged_groups = [{sen_att: 1}]
unprivileged_groups = [{sen_att: 0}]
cost_constraint = "fnr"
randseed = 12345679 

In [15]:
### logistic regression ### 

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve

# Placeholder for predicted and transformed datasets
dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
dataset_new_test_pred = dataset_orig_test.copy(deepcopy=True)

# Logistic regression classifier and predictions for training data
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()
lmod = LogisticRegression()
lmod.fit(X_train, y_train)

fav_idx = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]
y_train_pred_prob = lmod.predict_proba(X_train)[:,fav_idx]

X_test = scale_orig.transform(dataset_orig_test.features)
y_test_pred_prob = lmod.predict_proba(X_test)[:,fav_idx]

class_thresh = 0.5
dataset_orig_train_pred.scores = y_train_pred_prob.reshape(-1,1)

dataset_orig_test_pred.scores = y_test_pred_prob.reshape(-1,1)

y_train_pred = np.zeros_like(dataset_orig_train_pred.labels)
y_train_pred[y_train_pred_prob >= class_thresh] = dataset_orig_train_pred.favorable_label
y_train_pred[~(y_train_pred_prob >= class_thresh)] = dataset_orig_train_pred.unfavorable_label
dataset_orig_train_pred.labels = y_train_pred
    
y_test_pred = np.zeros_like(dataset_orig_test_pred.labels)
y_test_pred[y_test_pred_prob >= class_thresh] = dataset_orig_test_pred.favorable_label
y_test_pred[~(y_test_pred_prob >= class_thresh)] = dataset_orig_test_pred.unfavorable_label
dataset_orig_test_pred.labels = y_test_pred

In [16]:
# Metrics function
from collections import OrderedDict
from aif360.metrics import ClassificationMetric

def compute_metrics(dataset_true, dataset_pred, 
                    unprivileged_groups, privileged_groups,
                    disp = True):
    """ Compute the key metrics """
    classified_metric_pred = ClassificationMetric(dataset_true,
                                                 dataset_pred, 
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
    metrics = OrderedDict()
    metrics["Balanced accuracy"] = 0.5*(classified_metric_pred.true_positive_rate()+
                                             classified_metric_pred.true_negative_rate())
    metrics["Statistical parity difference"] = classified_metric_pred.statistical_parity_difference()
    metrics["Disparate impact"] = classified_metric_pred.disparate_impact()
    metrics["Average odds difference"] = classified_metric_pred.average_odds_difference()
    metrics["Equal opportunity difference"] = classified_metric_pred.equal_opportunity_difference()
    
    if disp:
        for k in metrics:
            print("%s = %.4f" % (k, metrics[k]))

In [17]:
### Reject Option Classification  ####
ROC = RejectOptionClassification(privileged_groups = privileged_groups,
                             unprivileged_groups = unprivileged_groups)

ROC = ROC.fit(dataset_orig_test, dataset_orig_test_pred)
dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)


In [18]:
 metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, 
                                      unprivileged_groups, privileged_groups,
                                      disp = True)


Balanced accuracy = 0.6542
Statistical parity difference = 0.0101
Disparate impact = 1.0144
Average odds difference = 0.0317
Equal opportunity difference = 0.0226


In [19]:
### extract test labels and prediction label for traditional model evaluation ###

y_test = dataset_orig_test.labels.ravel()
y_pred = dataset_transf_test_pred.labels.ravel()

### obtain performance metrics###

matrix = sklearn.metrics.confusion_matrix(y_test, y_pred)
accuracy_score = sklearn.metrics.accuracy_score(y_test, y_pred)
print(matrix, accuracy_score)
print(classification_report(y_test, y_pred))

[[ 709  618]
 [1056 3617]] 0.721
              precision    recall  f1-score   support

         0.0       0.40      0.53      0.46      1327
         1.0       0.85      0.77      0.81      4673

    accuracy                           0.72      6000
   macro avg       0.63      0.65      0.64      6000
weighted avg       0.75      0.72      0.73      6000

