# Fairness Analysis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import xgboost as xgb
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.metrics import classification_report
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate

In [26]:
df_cr_X_train = 'https://raw.githubusercontent.com/tracysnowf/Deloitte-CapstoneProject/main/data/X_train.csv'
df_cr_X_train = pd.read_csv(df_cr_X_train, sep = ',')

df_cr_y_train = 'https://raw.githubusercontent.com/tracysnowf/Deloitte-CapstoneProject/main/data/y_train.csv'
df_cr_y_train = pd.read_csv(df_cr_y_train, sep = ',')

df_cr_X_test = 'https://raw.githubusercontent.com/tracysnowf/Deloitte-CapstoneProject/main/data/X_test.csv'
df_cr_X_test = pd.read_csv(df_cr_X_test, sep = ',')

df_cr_y_test = 'https://raw.githubusercontent.com/tracysnowf/Deloitte-CapstoneProject/main/data/y_test.csv'
df_cr_y_test = pd.read_csv(df_cr_y_test, sep = ',')

In [27]:
df_cr_X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8361 entries, 0 to 8360
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   is_not_hispanic           8361 non-null   object 
 1   derived_race              8361 non-null   object 
 2   is_male                   8361 non-null   object 
 3   preapproval_requested     8361 non-null   object 
 4   loan_type                 8361 non-null   object 
 5   loan_purpose              8361 non-null   object 
 6   loan_amount               8361 non-null   float64
 7   loan_to_value_ratio       8361 non-null   float64
 8   interest_only_payment     8361 non-null   int64  
 9   balloon_payment           8361 non-null   int64  
 10  income                    8361 non-null   float64
 11  debt_to_income_ratio      8361 non-null   object 
 12  applicant_age             8361 non-null   object 
 13  log_loan_to_value_ratio   8361 non-null   float64
 14  mid_debt

In [28]:
df_cr_y_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8361 entries, 0 to 8360
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   action_taken  8361 non-null   int64
dtypes: int64(1)
memory usage: 65.4 KB


In [29]:
# get sensitive attributes
X_train_sensitive = df_cr_X_train[['ethnicity', 'derived_race', 'gender', 'income', 'applicant_age']]
X_train_sensitive.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8361 entries, 0 to 8360
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   is_not_hispanic  8361 non-null   object 
 1   derived_race     8361 non-null   object 
 2   is_male          8361 non-null   object 
 3   income           8361 non-null   float64
 4   applicant_age    8361 non-null   object 
dtypes: float64(1), object(4)
memory usage: 326.7+ KB


In [30]:
X_test_sensitive = df_cr_X_test[['ethnicity', 'derived_race', 'gender', 'income', 'applicant_age']]
X_test_sensitive.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2787 entries, 0 to 2786
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   is_not_hispanic  2787 non-null   object 
 1   derived_race     2787 non-null   object 
 2   is_male          2787 non-null   object 
 3   income           2787 non-null   float64
 4   applicant_age    2787 non-null   object 
dtypes: float64(1), object(4)
memory usage: 109.0+ KB


In [31]:
# One hot encoding (for categorical features with more than two categories itself (i.e. not 0 and 1))

def OneHotEncoding(df, enc, categories):  
  transformed = pd.DataFrame(enc.transform(df[categories]).toarray(), columns=enc.get_feature_names(categories))
  return pd.concat([df.reset_index(drop=True), transformed], axis=1).drop(categories, axis=1)

categories = ['derived_race', 'applicant_age']
enc_ohe = OneHotEncoder()
enc_ohe.fit(X_train_sensitive[categories])

X_train_sensitive = OneHotEncoding(X_train_sensitive, enc_ohe, categories)
X_test_sensitive = OneHotEncoding(X_test_sensitive, enc_ohe, categories)

In [32]:
X_train_sensitive.head()

Unnamed: 0,is_not_hispanic,is_male,income,derived_race_Asian,derived_race_Black or African American,derived_race_Minority Races,derived_race_White,applicant_age_25-34,applicant_age_35-44,applicant_age_45-54,applicant_age_55-64,applicant_age_65-74,applicant_age_<25,applicant_age_>74
0,Not Hispanic or Latino,Female,76.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,Not Hispanic or Latino,Female,102.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,Not Hispanic or Latino,Female,115.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Not Hispanic or Latino,Female,26.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,Not Hispanic or Latino,Female,94.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
# Ordinal Encoding (for categorical variables only have values 0 and 1)
categories = ['ethnicity', 'gender']
enc_oe = OrdinalEncoder()
enc_oe.fit(X_train_sensitive[categories])

X_train_sensitive[categories] = enc_oe.transform(X_train_sensitive[categories])
X_test_sensitive[categories] = enc_oe.transform(X_test_sensitive[categories])

In [34]:
X_train_sensitive.head()

Unnamed: 0,is_not_hispanic,is_male,income,derived_race_Asian,derived_race_Black or African American,derived_race_Minority Races,derived_race_White,applicant_age_25-34,applicant_age_35-44,applicant_age_45-54,applicant_age_55-64,applicant_age_65-74,applicant_age_<25,applicant_age_>74
0,1.0,0.0,76.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,102.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,115.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,26.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,1.0,0.0,94.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
# Standarization (for numeric variables)
# scalar.fit -> mean:1, std:100
# scalar.transform -> apply mean:1, std:100 to X_train
# scalar.transform -> apply mean:1, std:100 to X_test
categories = ['income']
scaler = StandardScaler()
scaler.fit(X_train_sensitive[categories])
X_train_sensitive[categories] = scaler.transform(X_train_sensitive[categories])
X_test_sensitive[categories] = scaler.transform(X_test_sensitive[categories])

In [36]:
X_train_sensitive.head()

Unnamed: 0,is_not_hispanic,is_male,income,derived_race_Asian,derived_race_Black or African American,derived_race_Minority Races,derived_race_White,applicant_age_25-34,applicant_age_35-44,applicant_age_45-54,applicant_age_55-64,applicant_age_65-74,applicant_age_<25,applicant_age_>74
0,1.0,0.0,-0.278194,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,-0.194786,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,-0.153082,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,-0.438594,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,1.0,0.0,-0.22045,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
X_train_sensitive.rename(columns = {'applicant_age_<25':'applicant_age_less_25', 'applicant_age_>74':'applicant_age_greater_74'}, inplace = True)
X_test_sensitive.rename(columns = {'applicant_age_<25':'applicant_age_less_25', 'applicant_age_>74':'applicant_age_greater_74'}, inplace = True)

In [38]:
#Train model 
model = xgb.XGBClassifier(objective="binary:logistic")
model.fit(X_train_sensitive, df_cr_y_train)

#Get predictions
y_pred = model.predict(X_test_sensitive)
df_cr_y_test['y_pred'] = y_pred

  return f(*args, **kwargs)




In [39]:
print(confusion_matrix(df_cr_y_test['action_taken'], y_pred))
print(accuracy_score(df_cr_y_test['action_taken'], y_pred))
print(classification_report(df_cr_y_test['action_taken'], y_pred))

[[ 149  516]
 [ 132 1990]]
0.767491926803014
              precision    recall  f1-score   support

           0       0.53      0.22      0.32       665
           1       0.79      0.94      0.86      2122

    accuracy                           0.77      2787
   macro avg       0.66      0.58      0.59      2787
weighted avg       0.73      0.77      0.73      2787



In [40]:
def fairness_metrics(df):
    """Calculate fairness for subgroup of population"""
    
    #Confusion Matrix
    cm=confusion_matrix(df['action_taken'],df['y_pred'])
    TN, FP, FN, TP = cm.ravel()
    
    N = TP+FP+FN+TN
    
    # Overall accuracy
    ACC = (TP+TN)/N
    
    # True positive rate
    TPR = TP/(TP+FN)
    
    # False positive rate
    FPR = FP/(FP+TN)
    
    # False negative rate
    FNR = FN/(TP+FN)
    
    # Percentage predicted as positive
    PPP = (TP + FP)/N
    
    return np.array([ACC, TPR, FPR, FNR, PPP])  
    

In [41]:
# Calculate fairness metrics for ethnicity(is_not_hispanic)
fm_ethnicity_1 = fairness_metrics(df_cr_y_test[X_test_sensitive['is_not_hispanic']==1])
fm_ethnicity_0 = fairness_metrics(df_cr_y_test[X_test_sensitive['is_not_hispanic']==0])

#Get ratio of fairness metrics
fm_ethnicity = fm_ethnicity_0/fm_ethnicity_1

print(fm_ethnicity_1)
print(fm_ethnicity_0)
print(fm_ethnicity)

[0.77043545 0.94197099 0.78352181 0.05802901 0.90450726]
[0.72189349 0.8699187  0.67391304 0.1300813  0.81656805]
[0.93699413 0.92350902 0.86010758 2.24165966 0.90277667]


In [42]:
col_names = ['Accuracy','True Positive', 'False Positive','False Negative', '% Predicted as Positive']
  
#display table
print(tabulate([fm_ethnicity_1, fm_ethnicity_0], headers=col_names, tablefmt="fancy_grid"))

╒════════════╤═════════════════╤══════════════════╤══════════════════╤═══════════════════════════╕
│   Accuracy │   True Positive │   False Positive │   False Negative │   % Predicted as Positive │
╞════════════╪═════════════════╪══════════════════╪══════════════════╪═══════════════════════════╡
│   0.770435 │        0.941971 │         0.783522 │         0.058029 │                  0.904507 │
├────────────┼─────────────────┼──────────────────┼──────────────────┼───────────────────────────┤
│   0.721893 │        0.869919 │         0.673913 │         0.130081 │                  0.816568 │
╘════════════╧═════════════════╧══════════════════╧══════════════════╧═══════════════════════════╛


In [43]:
# Calculate fairness metrics for sex (is_male)

fm_sex_1 = fairness_metrics(df_cr_y_test[X_test_sensitive['is_male']==1])
fm_sex_0 = fairness_metrics(df_cr_y_test[X_test_sensitive['is_male']==0])

#Get ratio of fairness metrics
fm_sex = fm_sex_0/fm_sex_1

print(fm_sex_1)
print(fm_sex_0)
print(fm_sex)

[0.77492669 0.92722117 0.75163399 0.07277883 0.88782991]
[0.76036543 0.94830827 0.79665738 0.05169173 0.91004919]
[0.9812095  1.02274225 1.05990069 0.71025779 1.02502651]


In [44]:
# Calculate fairness metrics for race (derived_race) 
# this is not a binary column, cannot compare within 2 races
# but can compare by is or is not a specific race
# for example, I am interest in the fairness metrics between is/ is not Asian (derived_race_Asian)
fm_asian_1 = fairness_metrics(df_cr_y_test[X_test_sensitive['derived_race_Asian']==1])
fm_asian_0 = fairness_metrics(df_cr_y_test[X_test_sensitive['derived_race_Asian']==0])

#Get ratio of fairness metrics
fm_asian = fm_asian_0/fm_asian_1

print(fm_asian_1)
print(fm_asian_0)
print(fm_asian)

[0.85380117 0.97241379 0.80769231 0.02758621 0.94736842]
[0.76185015 0.93525544 0.77464789 0.06474456 0.89602446]
[0.89230395 0.96178751 0.95908786 2.34699039 0.9458036 ]


In [45]:
# I am also interest in the fairness metrics between is/ is not Minority Races (derived_race_Minority Races)
fm_minority_1 = fairness_metrics(df_cr_y_test[X_test_sensitive['derived_race_Minority Races']==1])
fm_minority_0 = fairness_metrics(df_cr_y_test[X_test_sensitive['derived_race_Minority Races']==0])

#Get ratio of fairness metrics
fm_minority = fm_minority_0/fm_minority_1

print(fm_minority_1)
print(fm_minority_0)
print(fm_minority)

[0.75       0.87096774 0.53846154 0.12903226 0.77272727]
[0.76777251 0.93878527 0.78067485 0.06121473 0.90120306]
[1.02369668 1.07786457 1.44982472 0.47441416 1.16626279]


In [46]:
# income is not categorical, have not found a way to build a fairness metrics

In [47]:
# Calculate fairness metrics for age (applicant_age) 
# I'm interest in comparing people with and without age 35-44 (applicant_age_35-44)
fm_35_44_1 = fairness_metrics(df_cr_y_test[X_test_sensitive['applicant_age_35-44']==1])
fm_35_44_0 = fairness_metrics(df_cr_y_test[X_test_sensitive['applicant_age_35-44']==0])

#Get ratio of fairness metrics
fm_35_44 = fm_35_44_0/fm_35_44_1

print(fm_35_44_1)
print(fm_35_44_0)
print(fm_35_44)

[0.81315789 0.96758509 0.85314685 0.03241491 0.94605263]
[0.75037    0.9255814  0.75478927 0.0744186  0.88159842]
[0.92278512 0.95658915 0.88471202 2.29581395 0.93187038]


In [48]:
# I'm also interest in comparing people with and without age 65-74 (applicant_age_65-74)
# Calculate fairness metrics for age (applicant_age) 
# I'm interest in comparing people with and without age 35-44 (applicant_age_35-44)
fm_65_74_1 = fairness_metrics(df_cr_y_test[X_test_sensitive['applicant_age_65-74']==1])
fm_65_74_0 = fairness_metrics(df_cr_y_test[X_test_sensitive['applicant_age_65-74']==0])

#Get ratio of fairness metrics
fm_65_74 = fm_65_74_0/fm_65_74_1

print(fm_65_74_1)
print(fm_65_74_0)
print(fm_65_74)

[0.64081633 0.81756757 0.62886598 0.18243243 0.74285714]
[0.77970102 0.94680851 0.80105634 0.05319149 0.91424076]
[1.2167309  1.15807983 1.2738109  0.29156816 1.23070871]
