In [15]:
#Import lib
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from my_useful_functions import calculate_performance_statistical_parity,calculate_performance_equalized_odds,calculate_performance_equal_opportunity,calculate_performance_predictive_parity,calculate_performance_predictive_equality,calculate_performance_treatment_equality
from sklearn import preprocessing
from exponentiated_gradient_reduction import ExponentiatedGradientReduction
#Estimator
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from eq_odds_postprocessing import EqOddsPostprocessing
from aif360.datasets.binary_label_dataset import BinaryLabelDataset
import numpy as np
import sklearn.metrics as metrics 
from scipy import interpolate
from scipy import integrate
import matplotlib.pyplot as plt
import matplotlib
#matplotlib.use('TkAgg')
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [16]:
#http://archive.ics.uci.edu/ml/datasets/credit+approval
#http://rstudio-pubs-static.s3.amazonaws.com/73039_9946de135c0a49daa7a0a9eda4a67a72.html
#https://www.kaggle.com/code/chandanabhatt/prediction-of-credit-approval
# Credit approval dataset
# $ Male          : num  1 1 0 0 0 0 1 0 0 0 ...
# $ Age           : chr  "58.67" "24.50" "27.83" "20.17" ...
# $ Debt          : num  4.46 0.5 1.54 5.62 4 ...
# $ Married       : chr  "u" "u" "u" "u" ...
# $ BankCustomer  : chr  "g" "g" "g" "g" ...
# $ EducationLevel: chr  "q" "q" "w" "w" ...
# $ Ethnicity     : chr  "h" "h" "v" "v" ...
# $ YearsEmployed : num  3.04 1.5 3.75 1.71 2.5 ...
# $ PriorDefault  : num  1 1 1 1 1 1 1 1 1 0 ...
# $ Employed      : num  1 0 1 0 0 0 0 0 0 0 ...
# $ CreditScore   : num  6 0 5 0 0 0 0 0 0 0 ...
# $ DriversLicense: chr  "f" "f" "t" "f" ...
# $ Citizen       : chr  "g" "g" "g" "s" ...
# $ ZipCode       : chr  "00043" "00280" "00100" "00120" ...
# $ Income        : num  560 824 3 0 0 ...
# $ Approved      : chr  "+" "+" "+" "+" ...

def load_credit_approval():
    df = pd.read_csv('data/credit-approval.data',sep=",")
    protected_attribute = 'Male'
    majority_group_name = "Female"
    minority_group_name = "Male"
    class_label = 'Approved'
    filename = "EOP_DT.credit_approval.abroca.png"
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    #Remove missing value
    df = df[df['Male'] != '?']   
    #Label sex
    df['Male']=["Female" if v == "a" else "Male" for v in df['Male']]
    #Label class
    df['Approved']=[1 if v == "+" else 0 for v in df['Approved']]
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X_train, X_test, y_train, y_test,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [17]:
#Credit card client
#Gender (1 = male; 2 = female)
def load_credit_card():
    df = pd.read_csv('data/credit-card-clients.csv')    
    protected_attribute = 'SEX'
    majority_group_name = "Male"
    minority_group_name = "Female"
    class_label = 'default payment'
    filename = "EOP_DT.credit_card.abroca.png"   
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    #Label sex
    df['SEX']=["Female" if v == 2 else "Male" for v in df['SEX']]
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X_train, X_test, y_train, y_test,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [18]:
#German credit
def load_german_credit():
    df = pd.read_csv('data/german_data_credit.csv')    
    protected_attribute = 'sex'
    majority_group_name = "male"
    minority_group_name = "female"
    class_label = 'class-label'
    filename = "EOP_DT.german_credit.abroca.png"    
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X_train, X_test, y_train, y_test,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [19]:
def load_PAKDD2010():
    df = pd.read_csv('data/PAKDD.csv')    
    protected_attribute = 'SEX'
    majority_group_name = "Male"
    minority_group_name = "Female"
    class_label = 'TARGET_LABEL_BAD'
    filename = "EOP_DT.PAKDD.abroca.png"    
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    #Remove ID
    df=df.drop(columns=['ID_CLIENT'])
    df =df.dropna()
    df=df.drop(columns = ['RESIDENCIAL_PHONE_AREA_CODE','RESIDENCIAL_ZIP_3','PROFESSIONAL_ZIP_3'])
    #Label sex
    df['SEX']=["Female" if v == "F" else "Male" for v in df['SEX']]
    
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,:length-1]
    y = df[class_label]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X_train, X_test, y_train, y_test,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [20]:
#Credit scoring data
#https://www.kaggle.com/code/islombekdavronov/credit-scoring
#FinTech companies in Central Asia.
def load_credit_scoring():
    df = pd.read_csv('data/credit_scoring.csv')    
    protected_attribute = 'Sex'
    majority_group_name = "Male"
    minority_group_name = "Female"
    class_label = 'label'
    filename = "EOP_DT.credit_scoring.abroca.png"    
    
    print("Length:",len(df))
    print("Number of attribute:",len(df.columns))
    
    df = df.replace({'-':0})
    df['Score_point']=df['Score_point'].astype(float)
    
       
    #Label sex
    df['Sex']=["Female" if v == 2 else "Male" for v in df['Sex']]
    
    print("Length (cleaned):",len(df))
    print("Class imbalance: \n",df[class_label].value_counts())
    
    #label encode
    le = preprocessing.LabelEncoder()
    for i in df.columns:
        if df[i].dtypes == 'object':
            df[i] = le.fit_transform(df[i])
    #Splitting data into train and test
    length = len(df.columns)
    X = df.iloc[:,1:length-1]
    y = df[class_label]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 
    
    #Get index    
    feature = X.keys().tolist()    
    sa_index = feature.index(protected_attribute)
    p_Group = 0 
    
    return X_train, X_test, y_train, y_test,sa_index, p_Group, protected_attribute, filename,majority_group_name,minority_group_name

In [21]:
def run_experiment(X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name):    
      
    #Run DT model, thay doi ca proba ca predict lieu co dung ?
    clf =  NB = GaussianNB()
    Reduction = ExponentiatedGradientReduction(prot_attr=protected_attribute,estimator=clf, constraints = "EqualizedOdds")
    Reduction.fit(X_train,y_train)
    y_test_predicts = Reduction.predict(X_test)
    y_train_predicts = Reduction.predict(X_train)

    X_train_predicts = X_train.copy()
    X_test_predicts = X_test.copy()

    X_train_predicts[y_train.name] = y_train_predicts
    X_test_predicts[y_train.name] = y_test_predicts

    privileged_groups = [{protected_attribute: 1.0}]
    unprivileged_groups = [{protected_attribute: 0.0}]
    eop = EqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=42)
    #Create true dataset and pred dataset
    dataset_train_true = BinaryLabelDataset(df=pd.concat([X_train, y_train.to_frame()], axis=1), label_names=[y_train.name], protected_attribute_names=[protected_attribute])
    dataset_train_predicts = BinaryLabelDataset(df=X_train_predicts, label_names=[y_train.name], protected_attribute_names=[protected_attribute])

    # dataset_test_true = BinaryLabelDataset(df=pd.concat([X_test, y_test.to_frame()], axis=1), label_names=[y_test.name], protected_attribute_names=[protected_attribute])
    dataset_test_predicts = BinaryLabelDataset(df=X_test_predicts, label_names=[y_test.name], protected_attribute_names=[protected_attribute])
    # dataset_pred_proba = BinaryLabelDataset(df=X_pred_proba, label_names=[y_test.name], protected_attribute_names=[protected_attribute])

    eop.fit_predict(dataset_true=dataset_train_true, dataset_pred=dataset_train_predicts)
    dataset_predicts_transf = eop.predict(dataset_test_predicts)
    # dataset_pred_proba_transf = eop.fit_predict(dataset_true=dataset_true, dataset_pred=dataset_pred_proba)

    data_predicts = dataset_predicts_transf.convert_to_dataframe()[0]
    # data_pred_proba = dataset_pred_proba_transf.convert_to_dataframe()[0]

    y_transf_predicts = data_predicts[y_test.name].astype(int)
    # y_transf_pred_proba = data_pred_proba[y_test.name].astype(int)
        
    #Print measures, use new pred instead of old pred
        
    print("Statistical parity:")
    print(calculate_performance_statistical_parity(X_test.values, y_test.values, y_transf_predicts.values, sa_index, p_Group))
         
    print("Equal opportunity")
    print(calculate_performance_equal_opportunity(X_test.values, y_test.values, y_transf_predicts.values,  sa_index, p_Group))
        
    # print("Equalized odds")
    # # print(calculate_performance_equalized_odds(X_test.values, y_test.values, y_transf_predicts.values, y_transf_pred_proba.values, sa_index, p_Group))
    # print(calculate_performance_equalized_odds(X_test.values, y_test.values, y_transf_predicts.values, y_transf_predicts.values, sa_index, p_Group))
         
    print("Predictive parity")
    print(calculate_performance_predictive_parity(X_test.values, y_test.values, y_transf_predicts.values,  sa_index, p_Group))
        
    print("Predictive equality")
    print(calculate_performance_predictive_equality(X_test.values, y_test.values, y_transf_predicts.values,  sa_index, p_Group))
        
    print("Treatment equality")
    print(calculate_performance_treatment_equality(X_test.values, y_test.values, y_transf_predicts.values,  sa_index, p_Group))
        
    # 
    
    #make predictions
    # X_test['pred_proba'] = y_transf_predicts.values
    # X_test['true_label'] = y_test
    # df_test = X_test
    
    # #Compute Abroca
    # slice = compute_abroca(df_test, pred_col = 'pred_proba' , label_col = 'true_label', protected_attr_col = protected_attribute,
    #                        majority_protected_attr_val = 1, n_grid = 10000,
    #                        plot_slices = True, majority_group_name=majority_group_name ,minority_group_name=minority_group_name,file_name = filename)
    # print("ABROCA:",slice)
    

In [22]:
#Main function
def run_eval(dataset):
    if dataset == 'credit-approval':
        X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_credit_approval()
        run_experiment(X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                        
    if dataset == 'credit-card':
        X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_credit_card()
        run_experiment(X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                        
    if dataset == 'german-credit':
        X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_german_credit()
        run_experiment(X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                            
    if dataset == 'PAKDD':
        X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_PAKDD2010()
        run_experiment(X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                                                
    if dataset == 'credit-scoring':
        X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name = load_credit_scoring()
        run_experiment(X_train, X_test, y_train, y_test,sa_index, p_Group,protected_attribute,filename,majority_group_name,minority_group_name)                                                                        
    

In [23]:
run_eval('credit-approval')

Length: 690
Number of attribute: 16
Length (cleaned): 678
Class imbalance: 
 0    374
1    304
Name: Approved, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.7783625730994153, 'accuracy': 0.7941176470588235, 'f1-score': 0.7341772151898734, 'fairness': 0.03763143331488655}
Equal opportunity
{'balanced_accuracy': 0.7783625730994153, 'accuracy': 0.7941176470588235, 'f1-score': 0.7341772151898734, 'fairness': 0.09495192307692313, 'TPR_protected': 0.5769230769230769, 'TPR_non_protected': 0.671875, 'TNR_protected': 0.8717948717948718, 'TNR_non_protected': 0.9333333333333333}
Predictive parity
{'balanced_accuracy': 0.7783625730994153, 'accuracy': 0.7941176470588235, 'f1-score': 0.7341772151898734, 'fairness': 0.14583333333333337, 'TPR_protected': 0.5769230769230769, 'TPR_non_protected': 0.671875, 'TNR_protected': 0.8717948717948718, 'TNR_non_protected': 0.9333333333333333}
Predictive equality
{'balanced_accuracy': 0.7783625730994153, 'accuracy': 0.7941176470588235, 'f1-score': 0.734

In [24]:
run_eval('credit-card')

Length: 30000
Number of attribute: 24
Length (cleaned): 30000
Class imbalance: 
 0    23364
1     6636
Name: default payment, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.5030307861781076, 'accuracy': 0.4791111111111111, 'f1-score': 0.31321418107237037, 'fairness': 0.006063677866650963}
Equal opportunity
{'balanced_accuracy': 0.5030307861781076, 'accuracy': 0.4791111111111111, 'f1-score': 0.31321418107237037, 'fairness': 0.014832700931355691, 'TPR_protected': 0.5390134529147982, 'TPR_non_protected': 0.5538461538461539, 'TNR_protected': 0.46197051252047744, 'TNR_non_protected': 0.45861944344054933}
Predictive parity
{'balanced_accuracy': 0.5030307861781076, 'accuracy': 0.4791111111111111, 'f1-score': 0.31321418107237037, 'fairness': 0.03080541621356156, 'TPR_protected': 0.5390134529147982, 'TPR_non_protected': 0.5538461538461539, 'TNR_protected': 0.46197051252047744, 'TNR_non_protected': 0.45861944344054933}
Predictive equality
{'balanced_accuracy': 0.5030307861781076, 'accu

In [28]:
run_eval('credit-scoring')

Length: 8755
Number of attribute: 18
Length (cleaned): 8755
Class imbalance: 
 1    8059
0     696
Name: label, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.9127265807386796, 'accuracy': 0.9596497906357061, 'f1-score': 0.9777123633305299, 'fairness': 0.03755094444961382}
Equal opportunity
{'balanced_accuracy': 0.9127265807386796, 'accuracy': 0.9596497906357061, 'f1-score': 0.9777123633305299, 'fairness': 0.010147956272325787, 'TPR_protected': 0.9657027572293208, 'TPR_non_protected': 0.9758507135016465, 'TNR_protected': 0.8650306748466258, 'TNR_non_protected': 0.8333333333333334}
Predictive parity
{'balanced_accuracy': 0.9127265807386796, 'accuracy': 0.9596497906357061, 'f1-score': 0.9777123633305299, 'fairness': 0.0028669410150891084, 'TPR_protected': 0.9657027572293208, 'TPR_non_protected': 0.9758507135016465, 'TNR_protected': 0.8650306748466258, 'TNR_non_protected': 0.8333333333333334}
Predictive equality
{'balanced_accuracy': 0.9127265807386796, 'accuracy': 0.95964979063

In [26]:
run_eval('german-credit')

Length: 1000
Number of attribute: 22
Length (cleaned): 1000
Class imbalance: 
 1    700
0    300
Name: class-label, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.64056995635943, 'accuracy': 0.7066666666666667, 'f1-score': 0.7934272300469484, 'fairness': -0.11813893653516294}
Equal opportunity
{'balanced_accuracy': 0.64056995635943, 'accuracy': 0.7066666666666667, 'f1-score': 0.7934272300469484, 'fairness': 0.12497175141242944, 'TPR_protected': 0.8983050847457628, 'TPR_non_protected': 0.7733333333333333, 'TNR_protected': 0.3793103448275862, 'TNR_non_protected': 0.5161290322580645}
Predictive parity
{'balanced_accuracy': 0.64056995635943, 'accuracy': 0.7066666666666667, 'f1-score': 0.7934272300469484, 'fairness': 0.04804167470576881, 'TPR_protected': 0.8983050847457628, 'TPR_non_protected': 0.7733333333333333, 'TNR_protected': 0.3793103448275862, 'TNR_non_protected': 0.5161290322580645}
Predictive equality
{'balanced_accuracy': 0.64056995635943, 'accuracy': 0.7066666666666667,

In [27]:
run_eval('PAKDD')

Length: 50000
Number of attribute: 47
Length (cleaned): 38896
Class imbalance: 
 0    28747
1    10149
Name: TARGET_LABEL_BAD, dtype: int64
Statistical parity:
{'balanced_accuracy': 0.5089792828300436, 'accuracy': 0.7259405261804782, 'f1-score': 0.08471665712650259, 'fairness': -0.0013000085763060151}
Equal opportunity
{'balanced_accuracy': 0.5089792828300436, 'accuracy': 0.7259405261804782, 'f1-score': 0.08471665712650259, 'fairness': 0.01480856951380332, 'TPR_protected': 0.05402425578831312, 'TPR_non_protected': 0.0392156862745098, 'TNR_protected': 0.9712081294693263, 'TNR_non_protected': 0.968156766687079}
Predictive parity
{'balanced_accuracy': 0.5089792828300436, 'accuracy': 0.7259405261804782, 'f1-score': 0.08471665712650259, 'fairness': 0.0657629223366275, 'TPR_protected': 0.05402425578831312, 'TPR_non_protected': 0.0392156862745098, 'TNR_protected': 0.9712081294693263, 'TNR_non_protected': 0.968156766687079}
Predictive equality
{'balanced_accuracy': 0.5089792828300436, 'accurac