# Appendix

*Maciej Ołdakowski*

In [119]:
import dalex as dx

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

from sklearn.svm import SVC
import xgboost

import imblearn
from imblearn.under_sampling import RandomUnderSampler

from dalex.fairness import resample, reweight, roc_pivot

from copy import copy

import pandas as pd
import numpy as np

In [56]:
df = pd.read_csv('german_credit_risk.csv')

# change object to categorical in each column
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].astype('category')

# drop not needed rows
df = df.drop(['Unnamed: 0'], axis=1)

df.head()

Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Class
0,67,male,2,own,,little,1169,6,radio/TV,1
1,22,female,2,own,little,moderate,5951,48,radio/TV,2
2,49,male,1,own,little,,2096,12,education,1
3,45,male,2,free,little,little,7882,42,furniture/equipment,1
4,53,male,2,free,little,little,4870,24,car,2


In [57]:
# columns that are categorical
cat_cols = df.select_dtypes(include=['category']).columns

X = df.drop(columns=['Class'])
y = df.Class

# in y change 1 to 1 and 2 to 0
y = y.replace(2, 0) # 2 is bad credit, 1 is good credit

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

## Model 1 (Xgboost)

In [59]:
model = xgboost.XGBClassifier(
    n_estimators=50, 
    max_depth=4,
    eval_metric="logloss",
    grow_policy="lossguide",
    booster="gbtree",
    enable_categorical=True,
    tree_method="hist"
)

model.fit(X_train, y_train)

In [60]:
exp = dx.Explainer(model, X_test, y_test)

Preparation of a new explainer is initiated

  -> data              : 330 rows 9 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 330 values
  -> model_class       : xgboost.sklearn.XGBClassifier (default)
  -> label             : Not specified, model's class short name will be used. (default)
  -> predict function  : <function yhat_proba_default at 0x75067478fa60> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.0194, mean = 0.723, max = 0.998
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -0.995, mean = -0.0288, max = 0.955
  -> model_info        : package xgboost

A new explainer has been created!


In [61]:
exp.model_performance()

Unnamed: 0,recall,precision,f1,accuracy,auc
XGBClassifier,0.886463,0.792969,0.837113,0.760606,0.750659


In [62]:
# Statistical parity - STP, Equal opportunity - TPR, Predictive parity - PPV
protected_variable = X_test.Sex
privileged_group = "male"

fobject = exp.model_fairness(
    protected=protected_variable,
    privileged=privileged_group
)

fobject.fairness_check()

Bias detected in 1 metric: FPR

Conclusion: your model cannot be called fair because 1 criterion exceeded acceptable limits set by epsilon.
It does not mean that your model is unfair but it cannot be automatically approved based on these metrics.

Ratios of metrics, based on 'male'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
            TPR       ACC       PPV  FPR       STP
female  1.00678  1.058824  1.036943  0.7  0.916981


## Model 2 (SVC)

In [88]:
X_oneshotencoded =  pd.get_dummies(X)

X_train, X_test, y_train, y_test = train_test_split(X_oneshotencoded, y, test_size=0.33, random_state=42)

In [None]:
rfc = SVC(kernel='linear', random_state=42)
rfc.fit(X_train, y_train)

In [89]:
exp_2 = dx.Explainer(rfc, X_test, y_test)

Preparation of a new explainer is initiated

  -> data              : 330 rows 24 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 330 values
  -> model_class       : sklearn.svm._classes.SVC (default)
  -> label             : Not specified, model's class short name will be used. (default)
  -> predict function  : <function yhat_default at 0x75067478f9c0> will be used (default)
  -> predict function  : Accepts pandas.DataFrame and numpy.ndarray.
  -> predicted values  : min = 0.0, mean = 0.809, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -1.0, mean = -0.115, max = 1.0
  -> model_info        : package sklearn

A new explainer has been created!


In [90]:
exp_2.model_performance()

Unnamed: 0,recall,precision,f1,accuracy,auc
SVC,0.882096,0.756554,0.814516,0.721212,0.619266


In [94]:
# Statistical parity - STP, Equal opportunity - TPR, Predictive parity - PPV
protected_variable = X_test.Sex_male.apply(lambda x: "male" if x else "female")
privileged_group = "male"

fobject_2 = exp_2.model_fairness(
    protected=protected_variable,
    privileged=privileged_group
)
fobject_2.fairness_check()

Bias detected in 1 metric: FPR

Conclusion: your model cannot be called fair because 1 criterion exceeded acceptable limits set by epsilon.
It does not mean that your model is unfair but it cannot be automatically approved based on these metrics.

Ratios of metrics, based on 'male'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
             TPR      ACC     PPV       FPR       STP
female  0.893289  1.01532  1.0563  0.592693  0.800931


## Model 3 (XGBoost)

In [107]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# undersample = RandomUnderSampler(sampling_strategy='majority', random_state=42)
# X_train, y_train = undersample.fit_resample(X_train, y_train)

In [118]:
# resample
indices_resample = resample(
    X_train.Sex, 
    y_train, 
    type='preferential',
    probs=model.predict_proba(X_train)[:, 1], 
    verbose=False
)

In [120]:
model_resample = copy(model)

model_resample.fit(X_train.iloc[indices_resample, :], y_train.iloc[indices_resample])

In [141]:
exp_3 = dx.Explainer(model_resample, X_test, y_test, label='XGBClassifier_resample')

Preparation of a new explainer is initiated

  -> data              : 330 rows 9 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 330 values
  -> model_class       : xgboost.sklearn.XGBClassifier (default)
  -> label             : XGBClassifier_resample
  -> predict function  : <function yhat_proba_default at 0x75067478fa60> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.007, mean = 0.575, max = 0.996
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -0.996, mean = 0.119, max = 0.986
  -> model_info        : package xgboost

A new explainer has been created!


In [142]:
exp_3.model_performance()

Unnamed: 0,recall,precision,f1,accuracy,auc
XGBClassifier_resample,0.733624,0.835821,0.781395,0.715152,0.762333


In [143]:
# Statistical parity - STP, Equal opportunity - TPR, Predictive parity - PPV
protected_variable = X_test.Sex
privileged_group = "male"

fobject_3 = exp_3.model_fairness(
    protected=protected_variable,
    privileged=privileged_group
)

fobject_3.fairness_check()

No bias was detected!

Conclusion: your model is fair in terms of checked fairness criteria.

Ratios of metrics, based on 'male'. Parameter 'epsilon' was set to 0.8 and therefore metrics should be within (0.8, 1.25)
             TPR       ACC       PPV      FPR       STP
female  1.062413  1.028209  0.968009  1.07837  1.036484


## Summary

In [146]:
fobject.plot([fobject_2,fobject_3],show=False)

In [148]:
pd.concat([
    exp.model_performance().result, 
    exp_2.model_performance().result,
    exp_3.model_performance().result,
], axis=0)

Unnamed: 0,recall,precision,f1,accuracy,auc
XGBClassifier,0.886463,0.792969,0.837113,0.760606,0.750659
SVC,0.882096,0.756554,0.814516,0.721212,0.619266
XGBClassifier_resample,0.733624,0.835821,0.781395,0.715152,0.762333


In [164]:
results = pd.concat([
    fobject.result[["STP","TPR","PPV"]].iloc[0], 
    fobject_2.result[["STP","TPR","PPV"]].iloc[0],
    fobject_3.result[["STP","TPR","PPV"]].iloc[0],
], axis=1)

results.columns = ["XGBClassifier", "SVC", "XGBClassifier_resample"]

results

Unnamed: 0,XGBClassifier,SVC,XGBClassifier_resample
STP,0.916981,0.800931,1.036484
TPR,1.00678,0.893289,1.062413
PPV,1.036943,1.0563,0.968009
