In [1]:
#Import lib
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from my_useful_functions import calculate_performance_statistical_parity,calculate_performance_equalized_odds,calculate_performance_equal_opportunity,calculate_performance_predictive_parity,calculate_performance_predictive_equality,calculate_performance_treatment_equality
from sklearn import preprocessing
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from compute_abroca import *
import numpy as np
import sklearn.metrics as metrics 
from scipy import interpolate
from scipy import integrate
from disparate_impact_remover import DisparateImpactRemover
from aif360.datasets.binary_label_dataset import BinaryLabelDataset
import matplotlib.pyplot as plt
import matplotlib
#matplotlib.use('TkAgg')
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

pip install 'aif360[LawSchoolGPA]'


In [2]:
#http://archive.ics.uci.edu/ml/datasets/credit+approval
#http://rstudio-pubs-static.s3.amazonaws.com/73039_9946de135c0a49daa7a0a9eda4a67a72.html
#https://www.kaggle.com/code/chandanabhatt/prediction-of-credit-approval
# Credit approval dataset
# $ Male          : num  1 1 0 0 0 0 1 0 0 0 ...
# $ Age           : chr  "58.67" "24.50" "27.83" "20.17" ...
# $ Debt          : num  4.46 0.5 1.54 5.62 4 ...
# $ Married       : chr  "u" "u" "u" "u" ...
# $ BankCustomer  : chr  "g" "g" "g" "g" ...
# $ EducationLevel: chr  "q" "q" "w" "w" ...
# $ Ethnicity     : chr  "h" "h" "v" "v" ...
# $ YearsEmployed : num  3.04 1.5 3.75 1.71 2.5 ...
# $ PriorDefault  : num  1 1 1 1 1 1 1 1 1 0 ...
# $ Employed      : num  1 0 1 0 0 0 0 0 0 0 ...
# $ CreditScore   : num  6 0 5 0 0 0 0 0 0 0 ...
# $ DriversLicense: chr  "f" "f" "t" "f" ...
# $ Citizen       : chr  "g" "g" "g" "s" ...
# $ ZipCode       : chr  "00043" "00280" "00100" "00120" ...
# $ Income        : num  560 824 3 0 0 ...
# $ Approved      : chr  "+" "+" "+" "+" ...

def load_credit_approval():
    df = pd.read_csv('data/credit-approval.data',sep=",")
    protected_attribute = 'Male'
    majority_group_name = "Female"
    minority_group_name = "Male"
    class_label = 'Approved'
    filename = "DIR_DT_2.credit_approval.abroca.png"
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    #Remove missing value
    df = df[df['Male'] != '?']   
    #Label sex
    df['Male']=["Female" if v == "a" else "Male" for v in df['Male']]
    #Label class
    df['Approved']=[1 if v == "+" else 0 for v in df['Approved']]
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X, y, sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [3]:
#Credit card client
#Gender (1 = male; 2 = female)
def load_credit_card():
    df = pd.read_csv('data/credit-card-clients.csv')    
    protected_attribute = 'SEX'
    majority_group_name = "Male"
    minority_group_name = "Female"
    class_label = 'default payment'
    filename = "DIR_DT_2.credit_card.abroca.png"   
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    #Label sex
    df['SEX']=["Female" if v == 2 else "Male" for v in df['SEX']]
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X, y,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [4]:
#German credit
def load_german_credit():
    df = pd.read_csv('data/german_data_credit.csv')    
    protected_attribute = 'sex'
    majority_group_name = "male"
    minority_group_name = "female"
    class_label = 'class-label'
    filename = "DIR_DT_2.german_credit.abroca.png"    
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X, y,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [5]:
def load_PAKDD2010():
    df = pd.read_csv('data/PAKDD.csv')    
    protected_attribute = 'SEX'
    majority_group_name = "Male"
    minority_group_name = "Female"
    class_label = 'TARGET_LABEL_BAD'
    filename = "DIR_DT_2.PAKDD.abroca.png"    
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    #Remove ID
    df=df.drop(columns=['ID_CLIENT'])
    df =df.dropna()
    df=df.drop(columns = ['RESIDENCIAL_PHONE_AREA_CODE','RESIDENCIAL_ZIP_3','PROFESSIONAL_ZIP_3'])
    #Label sex
    df['SEX']=["Female" if v == "F" else "Male" for v in df['SEX']]
    
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X, y,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [6]:
#Credit scoring data
#https://www.kaggle.com/code/islombekdavronov/credit-scoring
#FinTech companies in Central Asia.
def load_credit_scoring():
    df = pd.read_csv('data/credit_scoring.csv')    
    protected_attribute = 'Sex'
    majority_group_name = "Male"
    minority_group_name = "Female"
    class_label = 'label'
    filename = "DIR_DT_2.credit_scoring.abroca.png"    
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    df = df.replace({'-':0})
    df['Score_point']=df['Score_point'].astype(float)
    
       
    #Label sex
    df['Sex']=["Female" if v == 2 else "Male" for v in df['Sex']]
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,1:length-1]
    y = df[class_label]
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X, y,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [7]:
def run_experiment(X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name): 

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)   
      
    dataset = BinaryLabelDataset(df=pd.concat([X, y.to_frame()], axis=1), label_names=[y.name], protected_attribute_names=[protected_attribute])
    datasetTrain = BinaryLabelDataset(df=pd.concat([X_train, y_train.to_frame()], axis=1), label_names=[y_train.name], protected_attribute_names=[protected_attribute])
    datasetTest = BinaryLabelDataset(df=pd.concat([X_test, y_test.to_frame()], axis=1), label_names=[y_test.name], protected_attribute_names=[protected_attribute])
    # Apply DisparateImpactRemover
    DisparateImpact = DisparateImpactRemover(sensitive_attribute=protected_attribute)
    dataset_train_transf = DisparateImpact.fit_transform(datasetTrain)
    dataset_test_transf = DisparateImpact.transform(datasetTest)
    
    # Convert the BinaryLabelDataset back to a pandas dataframe
    X_train_transf = dataset_train_transf.features
    y_train_transf = dataset_train_transf.labels.ravel()
    X_test_transf = dataset_test_transf.features
    y_test_transf= dataset_test_transf.labels.ravel()

    X_train_transf = pd.DataFrame(X_train_transf, columns = X_train.columns)
    y_train_transf = pd.Series(y_train_transf, name = y_train.name).astype(int)
    X_test_transf = pd.DataFrame(X_test_transf, columns = X_train.columns)
    y_test_transf = pd.Series(y_test_transf, name = y_train.name).astype(int)

    #Run DT model
    DT = tree.DecisionTreeClassifier(random_state=0)
    DT.fit(X_train_transf, y_train_transf)
    y_predicts = DT.predict(X_test_transf)
    y_pred_probs = DT.predict_proba(X_test_transf)
    #Print measures
        
    print("Statistical parity:")
    print(calculate_performance_statistical_parity(X_test.values, y_test.values, y_predicts, sa_index, p_Group))
         
    print("Equal opportunity")
    print(calculate_performance_equal_opportunity(X_test.values, y_test.values, y_predicts,  sa_index, p_Group))
        
    print("Equalized odds")
    print(calculate_performance_equalized_odds(X_test.values, y_test.values, y_predicts, y_pred_probs, sa_index, p_Group))
         
    print("Predictive parity")
    print(calculate_performance_predictive_parity(X_test.values, y_test.values, y_predicts,  sa_index, p_Group))
        
    print("Predictive equality")
    print(calculate_performance_predictive_equality(X_test.values, y_test.values, y_predicts,  sa_index, p_Group))
        
    print("Treatment equality")
    print(calculate_performance_treatment_equality(X_test.values, y_test.values, y_predicts,  sa_index, p_Group))
        
    
    #make predictions
    X_test['pred_proba'] = DT.predict_proba(X_test_transf)[:,1:2]
    X_test['true_label'] = y_test
    df_test = X_test
    
    print(len(df_test))
    #Compute Abroca
    slice = compute_abroca(df_test, pred_col = 'pred_proba' , label_col = 'true_label', protected_attr_col = protected_attribute,
                           majority_protected_attr_val = 1, n_grid = 10000,
                           plot_slices = True, majority_group_name=majority_group_name ,minority_group_name=minority_group_name,file_name = filename)
    print("ABROCA:",slice)
    plt.clf() 

In [8]:
#Main function
def run_eval(dataset):
    if dataset == 'credit-approval':
        X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_credit_approval()
        run_experiment(X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                        
    if dataset == 'credit-card':
        X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_credit_card()
        run_experiment(X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                        
    if dataset == 'german-credit':
        X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_german_credit()
        run_experiment(X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                            
    if dataset == 'PAKDD':
        X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_PAKDD2010()
        run_experiment(X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                                                
    if dataset == 'credit-scoring':
        X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_credit_scoring()
        run_experiment(X, y,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                                                        
    

In [9]:
run_eval('credit-approval')

Length: 690
Number of attribute: 16
Length (cleaned): 678
Class imbalance: 
 0    374
1    304
Name: Approved, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.5421052631578946, 'accuracy': 0.5196078431372549, 'f1-score': 0.5739130434782609, 'fairness': 0.013724405091311564}
Equal opportunity
{'balanced_accuracy': 0.5421052631578946, 'accuracy': 0.5196078431372549, 'f1-score': 0.5739130434782609, 'fairness': 0.11177884615384615, 'TPR_protected': 0.6538461538461539, 'TPR_non_protected': 0.765625, 'TNR_protected': 0.3076923076923077, 'TNR_non_protected': 0.37333333333333335}
Equalized odds
{'balanced_accuracy': 0.5421052631578946, 'accuracy': 0.5196078431372549, 'f1-score': 0.5739130434782609, 'fairness': 0.1774198717948718, 'TPR_protected': 0.6538461538461539, 'TPR_non_protected': 0.765625, 'TNR_protected': 0.3076923076923077, 'TNR_non_protected': 0.37333333333333335}
Predictive parity
{'balanced_accuracy': 0.5421052631578946, 'accuracy': 0.5196078431372549, 'f1-score': 0.573913

In [10]:
run_eval('credit-card')

Length: 30000
Number of attribute: 24
Length (cleaned): 30000
Class imbalance: 
 0    23364
1     6636
Name: default payment, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.6099518784786642, 'accuracy': 0.7186666666666667, 'f1-score': 0.3925143953934741, 'fairness': 0.02906791763165098}
Equal opportunity
{'balanced_accuracy': 0.6099518784786642, 'accuracy': 0.7186666666666667, 'f1-score': 0.3925143953934741, 'fairness': 0.0034494653328733804, 'TPR_protected': 0.4188340807174888, 'TPR_non_protected': 0.4153846153846154, 'TNR_protected': 0.8146501287151884, 'TNR_non_protected': 0.7838814600650524}
Equalized odds
{'balanced_accuracy': 0.6099518784786642, 'accuracy': 0.7186666666666667, 'f1-score': 0.3925143953934741, 'fairness': 0.034218133983009325, 'TPR_protected': 0.4188340807174888, 'TPR_non_protected': 0.4153846153846154, 'TNR_protected': 0.8146501287151884, 'TNR_non_protected': 0.7838814600650524}
Predictive parity
{'balanced_accuracy': 0.6099518784786642, 'accuracy': 0.71

In [11]:
run_eval('credit-scoring')

Length: 8755
Number of attribute: 18
Length (cleaned): 8755
Class imbalance: 
 1    8059
0     696
Name: label, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.9712970415666622, 'accuracy': 0.9908641035401599, 'f1-score': 0.9949958298582152, 'fairness': 0.03612294904004221}
Equal opportunity
{'balanced_accuracy': 0.9712970415666622, 'accuracy': 0.9908641035401599, 'f1-score': 0.9949958298582152, 'fairness': 0.0009891802869655963, 'TPR_protected': 0.9946200403496974, 'TPR_non_protected': 0.995609220636663, 'TNR_protected': 0.9693251533742331, 'TNR_non_protected': 0.8939393939393939}
Equalized odds
{'balanced_accuracy': 0.9712970415666622, 'accuracy': 0.9908641035401599, 'f1-score': 0.9949958298582152, 'fairness': 0.07637493972180476, 'TPR_protected': 0.9946200403496974, 'TPR_non_protected': 0.995609220636663, 'TNR_protected': 0.9693251533742331, 'TNR_non_protected': 0.8939393939393939}
Predictive parity
{'balanced_accuracy': 0.9712970415666622, 'accuracy': 0.9908641035401599, '

In [12]:
run_eval('german-credit')

Length: 1000
Number of attribute: 22
Length (cleaned): 1000
Class imbalance: 
 1    700
0    300
Name: class-label, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.6221410168778589, 'accuracy': 0.6766666666666666, 'f1-score': 0.7662650602409637, 'fairness': -0.07354202401372212}
Equal opportunity
{'balanced_accuracy': 0.6221410168778589, 'accuracy': 0.6766666666666666, 'f1-score': 0.7662650602409637, 'fairness': 0.09717514124293791, 'TPR_protected': 0.8305084745762712, 'TPR_non_protected': 0.7333333333333333, 'TNR_protected': 0.4482758620689655, 'TNR_non_protected': 0.5}
Equalized odds
{'balanced_accuracy': 0.6221410168778589, 'accuracy': 0.6766666666666666, 'f1-score': 0.7662650602409637, 'fairness': 0.1488992791739724, 'TPR_protected': 0.8305084745762712, 'TPR_non_protected': 0.7333333333333333, 'TNR_protected': 0.4482758620689655, 'TNR_non_protected': 0.5}
Predictive parity
{'balanced_accuracy': 0.6221410168778589, 'accuracy': 0.6766666666666666, 'f1-score': 0.7662650602409

In [13]:
run_eval('PAKDD')

Length: 50000
Number of attribute: 47
Length (cleaned): 38896
Class imbalance: 
 0    28747
1    10149
Name: TARGET_LABEL_BAD, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.5174079993600874, 'accuracy': 0.6189047904704773, 'f1-score': 0.2953573126287435, 'fairness': 0.025291884812548404}
Equal opportunity
{'balanced_accuracy': 0.5174079993600874, 'accuracy': 0.6189047904704773, 'f1-score': 0.2953573126287435, 'fairness': 0.011603000626932147, 'TPR_protected': 0.3065049614112459, 'TPR_non_protected': 0.29490196078431374, 'TNR_protected': 0.7474595408355288, 'TNR_non_protected': 0.709736680955297}
Equalized odds
{'balanced_accuracy': 0.5174079993600874, 'accuracy': 0.6189047904704773, 'f1-score': 0.2953573126287435, 'fairness': 0.04932586050716398, 'TPR_protected': 0.3065049614112459, 'TPR_non_protected': 0.29490196078431374, 'TNR_protected': 0.7474595408355288, 'TNR_non_protected': 0.709736680955297}
Predictive parity
{'balanced_accuracy': 0.5174079993600874, 'accuracy': 0.61