# German Credit Classification

This notebook is used as part of my thesis, comparing different XAI methods and libraries.
<br/>
The purpose of the created models is to classify the risk of credit applicants to decide if the credit should be denied.
<br/>
The result is then explained with the help of [Counterfactual Instances](https://docs.seldon.io/projects/alibi/en/stable/methods/CF.html).
<br/>

Dataset: https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)

## 1 Set up Environment and Dataset <a class="anchor" id="ch1"></a>

### 1.1 Load Libraries and Set Up Parameters <a class="anchor" id="ch1.1"></a>

In [1]:
# random seed for reproduction
seedNum = 23

In [2]:
# import dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import urllib.request
import seaborn as sns
import catboost
import shap
import lime
import graphviz
import tensorflow as tf

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.model_selection import cross_val_score, GridSearchCV, cross_validate
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, auc
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.inspection import partial_dependence, plot_partial_dependence

from catboost import CatBoostClassifier
from alibi.explainers import AnchorTabular, CounterFactualProto, CounterFactual
from alibi.utils.mapping import ohe_to_ord, ord_to_ohe
from datetime import datetime

# required installs:
# pip install shap
# pip install lime
# pip install alibi
# conda install python-graphviz AND install from https://graphviz.org/download/

In [3]:
# timer for the script processing
startTimeScript = datetime.now()

# set up n_jobs
n_jobs = 6

# set flag for splitting the dataset
splitDataset = True
splitPercentage = 0.20

# set number of folds for cross validation
n_folds = 10

# set various default modeling parameters
scoring = 'accuracy'

In [4]:
# list of column names
col_names = ['existing checking', 'credit duration (months)', 'credit history', 'credit purpose', 'credit amount',
             'existing savings', 'employment since', 'installment rate', 'sex and marital status', 'other debtors',
             'residence since (years)', 'property', 'age', 'other installment plans', 'housing',
             'existing credits', 'job', 'people liable', 'telephone', 'foreign worker', 'target']

# reordered list of column names, with all categorical variables at the front
new_order = ['existing checking', 'credit history', 'credit purpose', 'existing savings', 'employment since',
             'sex and marital status', 'other debtors', 'property', 'other installment plans', 'housing', 'job',
             'telephone', 'foreign worker', 'credit duration (months)', 'credit amount', 'installment rate',
             'residence since (years)','age','existing credits', 'people liable', 'target']

# dictionary of original categorical variable values and their encoded value
replace_with = {"A11" : 0, "A12" : 1, "A13" : 2, "A14" : 3,
                "A30" : 0, "A31" : 1, "A32" : 2, "A33" : 3, "A34" : 4,
                "A40" : 0, "A41" : 1, "A42" : 2, "A43" : 3, "A44" : 4, "A45" : 5,
                "A46" : 6, "A48" : 7, "A49" : 8, "A410" : 9,
                "A61" : 0, "A62" : 1, "A63" : 2, "A64" : 3, "A65" : 4,
                "A71" : 0, "A72" : 1, "A73" : 2, "A74" : 3, "A75" : 4,
                "A91" : 0, "A92" : 1, "A93" : 2, "A94" : 3,
                "A101" : 0, "A102" : 1, "A103" : 2,
                "A121" : 0, "A122" : 1, "A123" : 2, "A124" : 3,
                "A141" : 0, "A142" : 1, "A143" : 2,
                "A151" : 0, "A152" : 1, "A153" : 2,
                "A171" : 0, "A172" : 1, "A173" : 2, "A174" : 3,
                "A191" : 0, "A192" : 1,"A201" : 0, "A202" : 1
               }

# dictionary of categorical variable values
category_map={0: ["0 or less", "0 to 200", "more than 200", "no checking account"],
              1: ["no credits taken / all paid back duly", "all credits at this bank paid back duly",
                  "all paid back duly until now", "delay in paying off in the past",
                  "critical account/ other credits existing (not at this bank)"],
              2: ["car (new)", "car (used)", "furniture/equipment", "radio/television",
                  "domestic appliances", "repairs", "education", "retraining", "business", "others"],
              3: ["less than 100", "100 to 500", "500 to 1000", "more than 1000", "unknown/ no savings account"],
              4: ["unemployed", "less than 1 year", "1 to 4 years", "4 to 7 years", "more than 7 years"],
              5: ["male : divorced/separated", "female : divorced/separated/married", "male : single",
                  "male : married/widowed"],
              6: ["none", "co-applicant", "guarantor"],
              7: ["real estate", "life insurance", "car or other", "unknown / no property"], 
              8: ["bank", "stores", "none"], 
              9: ["rent", "own", "for free"], 
              10: ["unemployed/ unskilled - non-resident", "unskilled - resident", "skilled employee / official",
                   "management/self-employed/highly qualified/officer"], 
              11: ["no", "yes"], 
              12: ["yes", "no"], 
             }


In [5]:
#import dataset
dataset_path = 'http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data'
Xy_original = pd.read_csv(dataset_path, names=col_names, sep=' ', header=None)
Xy_original = Xy_original[new_order]
Xy_original.replace(replace_with, inplace=True)
Xy_original.replace({"target":{1:0, 2:1}},inplace=True)

In [6]:
print(Xy_original.groupby("target").size())

target
0    700
1    300
dtype: int64


### 1.2 Preprocessing <a class="anchor" id="ch1.3"></a>

In [7]:
# Use variable totCol to hold the number of columns in the dataframe
totCol = len(Xy_original.columns)
totAttr = totCol-1

X_original = Xy_original.iloc[:,0:totAttr]
y_original = Xy_original.iloc[:,totAttr]

print("Xy_original.shape: {} X_original.shape: {} y_original.shape: {}".format(Xy_original.shape, X_original.shape, y_original.shape))

Xy_original.shape: (1000, 21) X_original.shape: (1000, 20) y_original.shape: (1000,)


In [8]:
# create dictionary with the number of categories for each variable in the dataset
cat_vars_ord = {}
n_categories = len(list(category_map.keys()))
for i in range(n_categories):
    cat_vars_ord[i] = len(np.unique(X_original.to_numpy()[:, i]))
print(cat_vars_ord)

{0: 4, 1: 5, 2: 10, 3: 5, 4: 5, 5: 4, 6: 3, 7: 4, 8: 3, 9: 3, 10: 4, 11: 2, 12: 2}


In [9]:
# create dictionary containing the first column index for each one-hot encoded categorical variable
cat_vars_ohe = ord_to_ohe(X_original.to_numpy(), cat_vars_ord)[1]
print(cat_vars_ohe)

{0: 4, 4: 5, 9: 10, 19: 5, 24: 5, 29: 4, 33: 3, 36: 4, 40: 3, 43: 3, 46: 4, 50: 2, 52: 2}


In [10]:
X_num = X_original.to_numpy()[:, -7:].astype(np.float32, copy=False)
scaler = MinMaxScaler(feature_range=(-1,1))
X_num_scaled= scaler.fit_transform(X_num)

In [11]:
X_cat = X_original.to_numpy()[:, :13].copy()
ohe = OneHotEncoder(categories='auto')
ohe.fit(X_cat)
X_cat_ohe = ohe.transform(X_cat)

In [12]:
X_enc = np.c_[X_cat_ohe.todense(), X_num_scaled].astype(np.float32, copy=False)

X_enc = pd.DataFrame(X_enc)

In [13]:
# Split the data further into training and test datasets
X_train_df, X_test_df, y_train_df, y_test_df = train_test_split(X_enc, y_original, test_size=splitPercentage, 
                                                                stratify=y_original, random_state=seedNum)

print("X_train.shape: {} y_train_df.shape: {}".format(X_train_df.shape, y_train_df.shape))
print("X_test_df.shape: {} y_test_df.shape: {}".format(X_test_df.shape, y_test_df.shape))

X_train.shape: (800, 61) y_train_df.shape: (800,)
X_test_df.shape: (200, 61) y_test_df.shape: (200,)


In [14]:
# Finalize the training and testing datasets for the modeling activities
X_train = X_train_df.to_numpy()
y_train = y_train_df.to_numpy()
X_test = X_test_df.to_numpy()
y_test = y_test_df.to_numpy()
print("X_train.shape: {} y_train.shape: {}".format(X_train.shape, y_train.shape))
print("X_test.shape: {} y_test.shape: {}".format(X_test.shape, y_test.shape))

X_train.shape: (800, 61) y_train.shape: (800,)
X_test.shape: (200, 61) y_test.shape: (200,)


## 2 Tree-based Modeling <a class="anchor" id="ch2"></a>

Random Forest:

In [15]:
startTimeModule = datetime.now()

tune_model = RandomForestClassifier(random_state=seedNum, n_jobs=n_jobs)

n_estimators = [100]
criterion = ["gini","entropy"]
max_features =[None, "sqrt", 0.2, 0.3, 0.4, 0.5]

paramGrid = dict(n_estimators=n_estimators, criterion=criterion, max_features=max_features)

kfold = KFold(n_splits=n_folds)
grid = GridSearchCV(estimator=tune_model, param_grid=paramGrid, scoring=scoring, cv=kfold, refit="Accuracy")
grid_result = grid.fit(X_train, y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print ('Computing time:',(datetime.now() - startTimeModule))

clf_rf_be = grid_result.best_estimator_
clf_rf = clf_rf_be.fit(X_train, y_train)

Best: 0.760000 using {'criterion': 'gini', 'max_features': 0.4, 'n_estimators': 100}
Computing time: 0:00:12.219097


Gradient Boosting:

In [16]:
clf_cb_be = CatBoostClassifier(eval_metric='Accuracy', depth=6, verbose=False)
clf_cb = clf_cb_be.fit(X_train, y_train, verbose=False)

Evaluation:

In [17]:
predictions_rf = clf_rf.predict(X_test)
predictions_cb = clf_cb.predict(X_test)
cv_rf = cross_val_score(clf_rf_be, X_train, y_train, cv=kfold, scoring=scoring)
cv_cb = cross_val_score(clf_cb_be, X_train, y_train, cv=kfold, scoring=scoring)

print(clf_rf,"\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions_rf))
print("\n\nClassification Report:\n\n",classification_report(y_test, predictions_rf))
print("Cross-Validation: %f (%f)" % (cv_rf.mean(), cv_rf.std()))
print("--------------------------------------------------------\n")

print(clf_cb,"\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions_cb))
print("\n\nClassification Report:\n\n",classification_report(y_test, predictions_cb))
print("Cross-Validation: %f (%f)" % (cv_cb.mean(), cv_cb.std()))

RandomForestClassifier(max_features=0.4, n_jobs=6, random_state=23) 
Confusion Matrix:
[[127  13]
 [ 33  27]]


Classification Report:

               precision    recall  f1-score   support

           0       0.79      0.91      0.85       140
           1       0.68      0.45      0.54        60

    accuracy                           0.77       200
   macro avg       0.73      0.68      0.69       200
weighted avg       0.76      0.77      0.75       200

Cross-Validation: 0.760000 (0.024238)
--------------------------------------------------------

<catboost.core.CatBoostClassifier object at 0x0000020F8A816040> 
Confusion Matrix:
[[126  14]
 [ 27  33]]


Classification Report:

               precision    recall  f1-score   support

           0       0.82      0.90      0.86       140
           1       0.70      0.55      0.62        60

    accuracy                           0.80       200
   macro avg       0.76      0.73      0.74       200
weighted avg       0.79      0.80  

### 3 Counterfactuals <a class="anchor" id="ch3"></a>

In [18]:
clf=clf_cb
pred_idx = 3

probabilities = clf.predict_proba(X_test)
print("Probabilities: ", probabilities[pred_idx])
print("Correct class: ", y_test[pred_idx])

Probabilities:  [0.91281177 0.08718823]
Correct class:  0


In [19]:
target_names = ["good credit", "bad credit"]
feature_names = X_original.columns.values

In [20]:
x = X_test[pred_idx].reshape((1,) + X_test[0].shape)

In [21]:
predict_fn = lambda x: clf.predict_proba(x)

In [22]:
x = X_test[pred_idx].reshape((1,) + X_test[0].shape)

shape = x.shape
beta = .01
c_init = 1.
c_steps = 5
max_iterations = 500
rng = (-1., 1.)  # scale features between -1 and 1
rng_shape = (1,) + X_original.shape[1:]
feature_range = ((np.ones(rng_shape) * rng[0]).astype(np.float32),
                 (np.ones(rng_shape) * rng[1]).astype(np.float32))
use_kdtree = True
theta = 10.  # weight of prototype loss term



tf.compat.v1.disable_eager_execution()

cf = CounterFactualProto(predict_fn,
                         shape,
                         beta=beta,
                         theta=theta,
                         cat_vars=cat_vars_ohe,
                         ohe=True,
                         use_kdtree=use_kdtree,
                         max_iterations=max_iterations,
                         feature_range=feature_range,
                         c_init=c_init,
                         c_steps=c_steps,
                         eps=(0.05, 0.05)
                        )

cf.fit(X_train, d_type='abdm');




In [23]:
def describe_instance(X, explanation, eps=1e-2):
    print('Prediction by the model: {}  -- proba: {}'.format(target_names[explanation.orig_class],
                                                       explanation.orig_proba[0]))
    print('Counterfactual instance: {}  -- proba: {}'.format(target_names[explanation.cf['class']],
                                                             explanation.cf['proba'][0]))
    print('\nCounterfactual perturbations...')
    
    print('\nCategorical:')
    X_orig_ord = ohe_to_ord(X, cat_vars_ohe)[0]
    X_cf_ord = ohe_to_ord(explanation.cf['X'], cat_vars_ohe)[0]
    delta_cat = {}
    for i, (_, v) in enumerate(category_map.items()):
        cat_orig = v[int(X_orig_ord[0, i])]
        cat_cf = v[int(X_cf_ord[0, i])]
        if cat_orig != cat_cf:
            delta_cat[feature_names[i]] = [cat_orig, cat_cf]
    if delta_cat:
        for k, v in delta_cat.items():
            print('{}: {}  -->   {}'.format(k, v[0], v[1]))
    
    print('\nNumerical:')
    delta_num = X_cf_ord[0, -7:] - X_orig_ord[0, -7:]
    n_keys = len(list(cat_vars_ord.keys()))
    X_orig_num = scaler.inverse_transform(X_orig_ord[0,-7:].reshape(1,-1))
    X_cf_num = scaler.inverse_transform(X_cf_ord[0,-7:].reshape(1,-1))
    for i in range(delta_num.shape[0]):
        if np.abs(delta_num[i]) > eps:
            print('{}: {:.2f}  -->   {:.2f}'.format(feature_names[i+n_keys],
                                            #X_orig_ord[0,i+n_keys],
                                            X_orig_num[0,i],
                                            #X_cf_ord[0,i+n_keys]))
                                            X_cf_num[0,i]))      

In [24]:
def describe_instance2(X, explanation, eps=1e-2):
    print('Nearest counterfactual instance: {}'.format(target_names[explanation.cf['class']]))
    print('Probabilities: ',round(explanation.cf['proba'][0][0],2)," ",round(explanation.cf['proba'][0][1],2))
    
    print('\nSmallest feature value changes necessary:\n')
    
    #print('\nCategorical:')
    X_orig_ord = ohe_to_ord(X, cat_vars_ohe)[0]
    X_cf_ord = ohe_to_ord(explanation.cf['X'], cat_vars_ohe)[0]
    delta_cat = {}
    for i, (_, v) in enumerate(category_map.items()):
        cat_orig = v[int(X_orig_ord[0, i])]
        cat_cf = v[int(X_cf_ord[0, i])]
        if cat_orig != cat_cf:
            delta_cat[feature_names[i]] = [cat_orig, cat_cf]
    if delta_cat:
        for k, v in delta_cat.items():
            print('{}: {}  -->   {}'.format(k, v[0], v[1]))
    
    #print('\nNumerical:')
    delta_num = X_cf_ord[0, -6:] - X_orig_ord[0, -6:]
    n_keys = len(list(cat_vars_ord.keys()))
    X_orig_num = scaler.inverse_transform(X_orig_ord[0,-6:].reshape(1,-1))
    X_cf_num = scaler.inverse_transform(X_cf_ord[0,-6:].reshape(1,-1))
    for i in range(delta_num.shape[0]):
        if np.abs(delta_num[i]) > eps:
            print('{}: {:.2f}  -->   {:.2f}'.format(feature_names[i+n_keys],
                                            #X_orig_ord[0,i+n_keys],
                                            X_orig_num[0,i],
                                            #X_cf_ord[0,i+n_keys]))
                                            X_cf_num[0,i]))      

In [25]:
startTimeModule = datetime.now()
explanation = cf.explain(x)
print ('Computing time:',(datetime.now() - startTimeModule))

Computing time: 0:00:22.613746


In [26]:
describe_instance2(x, explanation)

Nearest counterfactual instance: bad credit
Probabilities:  0.31   0.69

Smallest feature value changes necessary:

existing checking: no checking account  -->   0 or less
employment since: more than 7 years  -->   1 to 4 years
property: life insurance  -->   car or other
other installment plans: none  -->   bank
housing: own  -->   rent


ValueError: operands could not be broadcast together with shapes (1,6) (7,) (1,6) 