### Explaining Logistic Regression

In [None]:
# Sklearn 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score
from sklearn.inspection import plot_partial_dependence

In [None]:
import pandas as pd
import pickle
import numpy as np

In [None]:
# explanation algorithms
import shap
import lime
import lime.lime_tabular
from ibreakdown import ClassificationExplainer
from ceml.sklearn import generate_counterfactual

### We load the test data

In [None]:
y_test = pd.read_csv("cardFraud_Y_test.csv")
x_test = pd.read_csv("cardFraud_X_test.csv")
x_test.head()

### We load the model

In [None]:
model = pickle.load(open("models/logisticRegression_oversampled_bruteForce.sav", 'rb'))
model

In [None]:
# We check is the good one
pred = model.predict(x_test)
print("Accuracy score:", accuracy_score(y_test, pred))
print("Recall score:", recall_score(y_test, pred))

### Explanations

In [None]:
# Our test sample is a fraud one
sample = x_test[3399:3400]
sample

#### LIME

In [None]:
predictions_lime = lambda x: model.predict_proba(x).astype(float)
X = x_test.values
explainer = lime.lime_tabular.LimeTabularExplainer(X,feature_names = x_test.columns,class_names=['Non-Fraud','Fraud'],kernel_width=5)

In [None]:
exp = explainer.explain_instance(sample.values[0], predictions_lime,num_features=len(x_test.columns))
exp.show_in_notebook(show_all=False)

#### PDP

In [None]:
# Coefficient of the features in the decision function.
model.coef_

In [None]:
features = ['V14', 'V17', 'V3', 'V7']
display = plot_partial_dependence(
       model, x_test, features,
       n_jobs=3, grid_resolution=250)

#### ICE

In [None]:
features = ['V14', 'V17', 'V3', 'V7']
display = plot_partial_dependence(
       model, x_test, features, kind="both", 
       n_jobs=3, grid_resolution=250)

#### SHAP

In [None]:
# load JS visualization code to notebook
shap.initjs()

In [None]:
# explain the model's predictions using SHAP
explainer = shap.KernelExplainer(model.predict, x_test[0:10])
shap_values = explainer.shap_values(x_test[0:1000])

In [None]:
shap.force_plot(explainer.expected_value[0], shap_values[0])

In [None]:
shap.force_plot(explainer.expected_value[0], shap_values[0][0,:], sample.values[0])

In [None]:
shap.summary_plot(shap_values[0], x_test[0:1000])

In [None]:
shap.dependence_plot("Amount", shap_values[0], x_test[0:1000])

In [None]:
shap.summary_plot(shap_values[0], x_test[0:1000], plot_type="bar")

#### iBreakDown

In [None]:
explainer = ClassificationExplainer(model)
classes = ['Non-Fraud', 'Fraud']
explainer.fit(x_test, x_test.columns, classes)

In [None]:
exp = explainer.explain(sample.values[0])
exp.print()

#### CounterFactual Explanations

In [None]:
features_whitelist = None   # We can use all features

In [None]:
#sample = np.array(x_test)[3399:3400,:][0]
sample = np.array(x_test)[1,:]
sample

In [None]:
model.predict([sample])

In [None]:
print("\nCompute counterfactual ....")
print(generate_counterfactual(model,sample, y_target=1, features_whitelist=features_whitelist))

In [None]:
counterfactual = generate_counterfactual(model,sample, y_target=1, features_whitelist=features_whitelist)

In [None]:
counterfactual

In [None]:
counterfactual['x_cf']

In [None]:
pd.DataFrame(counterfactual['x_cf'].reshape(-1, len(counterfactual['delta'])), columns=x_test.columns)

In [None]:
pd.DataFrame(counterfactual['delta'].reshape(-1, len(counterfactual['delta'])), columns=x_test.columns)

In [None]:
counterfactual['delta']

In [None]:
new_sample = np.array([])
for i,y in zip(counterfactual['delta'], counterfactual['x_cf']):
    tmp = np.array([y+i])
    new_sample = np.append(new_sample, [tmp])

In [None]:
model.predict([new_sample])

In [None]:
pd.DataFrame(new_sample.reshape(-1, len(counterfactual['delta'])), columns=x_test.columns)