In [None]:
# Undersampling
# The GBM model has 96.06% accuracy and 93.81% on recall

In [None]:
# Sklearn + xgboost
from sklearn.metrics import accuracy_score, recall_score
from sklearn.inspection import plot_partial_dependence

In [None]:
import pandas as pd
import pickle
import numpy as np
import time

In [None]:
# explanation algorithms
import shap
import lime
import lime.lime_tabular
from ibreakdown import ClassificationExplainer

In [None]:
import xgboost as xgb

### We load the test data

In [None]:
y_test = pd.read_csv("cardFraud_Y_test.csv")
x_test = pd.read_csv("cardFraud_X_test.csv")
x_test.head()

### We load the model

In [None]:
model = pickle.load(open("models/gbm_undersampled.sav", 'rb'))
model

In [None]:
# We check is the good one
pred = model.predict(x_test)
print("Accuracy score:", accuracy_score(y_test, pred))
print("Recall score:", recall_score(y_test, pred))

### Explanations

In [None]:
# Our test sample is a fraud one
sample = x_test[3399:3400] #sample = x_test[1016:1017]
sample

#### LIME

In [None]:
predictions_lime = lambda x: model.predict_proba(x).astype(float)
X = x_test.values
explainer = lime.lime_tabular.LimeTabularExplainer(X,feature_names = x_test.columns,class_names=['Non-Fraud','Fraud'],kernel_width=5)

In [None]:
tmp = time.time()
exp = explainer.explain_instance(sample.values[0], predictions_lime,num_features=len(x_test.columns))
exp.show_in_notebook(show_all=False)
print("LIME done in: " + str((time.time() - tmp)) + " seconds.")

#### PDP

In [None]:
model.feature_importances_

In [None]:
model.get_booster().get_score(importance_type='gain')

In [None]:
tmp = time.time()
features = ['Amount']
display = plot_partial_dependence(
       model, x_test, features,
       n_jobs=3, grid_resolution=250)
print("PDP done in: " + str((time.time() - tmp)) + " seconds.")

#### ICE

In [None]:
features = ['V14', 'V17', 'V3', 'V7']
display = plot_partial_dependence(
       model, x_test, features, kind="both", 
       n_jobs=3, grid_resolution=250)

#### SHAP

In [None]:
# load JS visualization code to notebook
shap.initjs()

In [None]:
# Our test sample is a fraud one
sample_num = 3399
sample = x_test[sample_num:sample_num+1]
sample

In [None]:
# Our test sample is a fraud one
sample = x_test[1016:1017]
sample
model.predict_proba(sample)

In [None]:
# explain the model's predictions using SHAP
tmp = time.time()
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(x_test[0:10000])
print("SHAP TreeExplainer done in: " + str((time.time() - tmp)) + " seconds.")

In [None]:
shap.force_plot(explainer.expected_value, shap_values[sample_num,:], sample)

In [None]:
shap.force_plot(explainer.expected_value, shap_values)

In [None]:
shap.summary_plot(shap_values, x_test[0:1000])

In [None]:
shap.dependence_plot("Amount", shap_values, x_test[0:1000])

In [None]:
shap.summary_plot(shap_values, x_test[0:1000], plot_type="bar")

In [None]:
# Kernel SHAP

In [None]:
# explain the model's predictions using SHAP
tmp = time.time()
explainer = shap.KernelExplainer(model.predict_proba, x_test[0:100])
shap_values = explainer.shap_values(x_test[0:100])
print("SHAP KernelExplainer done in: " + str((time.time() - tmp)) + " seconds.")

#### iBreakDown

In [None]:
explainer = ClassificationExplainer(model)
classes = ['Non-Fraud', 'Fraud']
explainer.fit(x_test, x_test.columns, classes)

In [None]:
tmp = time.time()
exp = explainer.explain(sample.values[0])
print("iBreakDown done in: " + str((time.time() - tmp)) + " seconds.")
exp.print()