## TF Neural Network

In [None]:
# Data and Model libraries + others
import tensorflow as tf
from tensorflow import keras

import os
import tempfile

import numpy as np
import pandas as pd

import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import time

In [None]:
np.random.seed(777)

In [None]:
# Explainability libraries

from sklearn.inspection import plot_partial_dependence
import shap
import lime
from ibreakdown import ClassificationExplainer

### Load data

In [None]:
data = pd.read_csv('data_creditCardFraud.csv')
data.head()

In [None]:
# Splitting Data to Train Validation and test
# We also drop feature time because it is different from every transacion and does not give us any insight
X = data.drop(['Time'], axis=1)

train, test = train_test_split(X, test_size=0.2, random_state=777)
train, val = train_test_split(train, test_size=0.2, random_state=777)

In [None]:
# np arrays for the features for the neural network
train_labels = np.array(train.pop('Class'))
bool_train_labels = train_labels != 0
val_labels = np.array(val.pop('Class'))
test_labels = np.array(test.pop('Class'))

train_features = np.array(train)
val_features = np.array(val)
test_features = np.array(test)

In [None]:
# normalize the features to avoid peeking during tests
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)

val_features = scaler.transform(val_features)
test_features = scaler.transform(test_features)

train_features = np.clip(train_features, -5, 5)
val_features = np.clip(val_features, -5, 5)
test_features = np.clip(test_features, -5, 5)


print('Training labels shape:', train_labels.shape)
print('Validation labels shape:', val_labels.shape)
print('Test labels shape:', test_labels.shape)

print('Training features shape:', train_features.shape)
print('Validation features shape:', val_features.shape)
print('Test features shape:', test_features.shape)

### Model Definition

In [None]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'),
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]

def make_model(metrics=METRICS, output_bias=None):
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    model = keras.Sequential([
        keras.layers.Dense(16, activation='relu', input_shape=(train_features.shape[-1],)),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(1, activation='sigmoid',bias_initializer=output_bias),
    ])

    model.compile(optimizer=keras.optimizers.Adam(lr=1e-3),
                  loss=keras.losses.BinaryCrossentropy(),
                  metrics=metrics)

    return model

In [None]:
EPOCHS = 100
BATCH_SIZE = 2048

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_auc', 
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)

In [None]:
neg, pos = np.bincount(data['Class'])
initial_bias = np.log([pos/neg])
initial_bias

In [None]:
model = make_model(output_bias=initial_bias)
model.summary()

In [None]:
initial_weights = os.path.join(tempfile.mkdtemp(), 'initial_weights')
model.save_weights(initial_weights)

In [None]:
model = make_model()
model.load_weights(initial_weights)
baseline_history = model.fit(
    train_features,
    train_labels,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[early_stopping],
    validation_data=(val_features, val_labels))

In [None]:
baseline_results = model.evaluate(test_features, test_labels,
                                  batch_size=BATCH_SIZE, verbose=0)
for name, value in zip(model.metrics_names, baseline_results):
    print(name, ': ', value)
print()

### Undersampling

In [None]:
und = pd.read_csv("cardFraud_X_train_undersampled.csv")
und_y = pd.read_csv("cardFraud_Y_train_undersampled.csv")
und['Class'] = und_y


print(len(und))
print(und['Class'].value_counts())
und.head()

In [None]:
train_und, val_und = train_test_split(und, test_size=0.10, random_state=777)

# np arrays for the features for the neural network
train_labels_und = np.array(train_und.pop('Class'))
bool_train_labels_und = train_labels_und != 0
val_labels_und = np.array(val_und.pop('Class'))

train_features_und = np.array(train_und)
val_features_und = np.array(val_und)

In [None]:
# normalize the features to avoid peeking during tests
scaler = StandardScaler()
train_features_und = scaler.fit_transform(train_features_und)
val_features_und = scaler.transform(val_features_und)

train_features_und = np.clip(train_features_und, -5, 5)
val_features_und = np.clip(val_features_und, -5, 5)

print('Training labels shape:', train_labels_und.shape)
print('Validation labels shape:', val_labels_und.shape)
print('Test labels shape:', test_labels.shape)

print('Training features shape:', train_features_und.shape)
print('Validation features shape:', val_features_und.shape)
print('Test features shape:', test_features.shape)

In [None]:
BATCH_SIZE = 600 # we have far less data =(
model_und = make_model()
model_und.load_weights(initial_weights)
baseline_history = model_und.fit(
    train_features_und,
    train_labels_und,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    #callbacks=[early_stopping],
    validation_data=(val_features_und, val_labels_und))

In [None]:
baseline_results_und = model_und.evaluate(test_features, test_labels,
                                  batch_size=BATCH_SIZE, verbose=0)
for name, value in zip(model.metrics_names, baseline_results_und):
    print(name, ': ', value)
print()

In [None]:
model_und.save('creditCardFraud_tfnn_oversampledV2_b600.h5')

### Oversampling

In [None]:
over = pd.read_csv("cardFraud_X_train_oversampled.csv")
over_y = pd.read_csv("cardFraud_Y_train_oversampled.csv")
over['Class'] = over_y


print(len(over))
print(over['Class'].value_counts())
over.head()

In [None]:
train_over, val_over = train_test_split(over, test_size=0.10, random_state=777)

# np arrays for the features for the neural network
train_labels_over = np.array(train_over.pop('Class'))
bool_train_labels_over = train_labels_over != 0
val_labels_over = np.array(val_over.pop('Class'))

train_features_over = np.array(train_over)
val_features_over = np.array(val_over)

In [None]:
# normalize the features to avoid peeking during tests
scaler = StandardScaler()
train_features_over = scaler.fit_transform(train_features_over)
val_features_over = scaler.transform(val_features_over)

train_features_over = np.clip(train_features_over, -5, 5)
val_features_over = np.clip(val_features_over, -5, 5)

print('Training labels shape:', train_labels_over.shape)
print('Validation labels shape:', val_labels_over.shape)
print('Test labels shape:', test_labels.shape)

print('Training features shape:', train_features_over.shape)
print('Validation features shape:', val_features_over.shape)
print('Test features shape:', test_features.shape)

In [None]:
BATCH_SIZE = 250000

model_over = make_model()
model_over.load_weights(initial_weights)
baseline_history_over = model_over.fit(
    train_features_over,
    train_labels_over,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[early_stopping],
    validation_data=(val_features_over, val_labels_over))

In [None]:
baseline_results_over = model_over.evaluate(test_features, test_labels,
                                  batch_size=BATCH_SIZE, verbose=0)
for name, value in zip(model.metrics_names, baseline_results_over):
    print(name, ': ', value)
print()

In [None]:
model_over.save('creditCardFraud_tfnn_oversampled_b250000.h5')

### Oversampling v2

In [None]:
over2 = pd.read_csv("cardFraud_X_train_oversampled_v2.csv")
over2_y = pd.read_csv("cardFraud_Y_train_oversampled_v2.csv")
over2['Class'] = over2_y


print(len(over2))
print(over2['Class'].value_counts())
over2.head()

In [None]:
train_over2, val_over2 = train_test_split(over2, test_size=0.10, random_state=777)

# np arrays for the features for the neural network
train_labels_over2 = np.array(train_over2.pop('Class'))
bool_train_labels_over2 = train_labels_over2 != 0
val_labels_over2 = np.array(val_over2.pop('Class'))

train_features_over2 = np.array(train_over2)
val_features_over2 = np.array(val_over2)

In [None]:
# normalize the features to avoid peeking during tests
scaler = StandardScaler()
train_features_over2 = scaler.fit_transform(train_features_over2)
val_features_over2 = scaler.transform(val_features_over2)

train_features_over2 = np.clip(train_features_over2, -5, 5)
val_features_over2 = np.clip(val_features_over2, -5, 5)

print('Training labels shape:', train_labels_over2.shape)
print('Validation labels shape:', val_labels_over2.shape)
print('Test labels shape:', test_labels.shape)

print('Training features shape:', train_features_over2.shape)
print('Validation features shape:', val_features_over2.shape)
print('Test features shape:', test_features.shape)

In [None]:
BATCH_SIZE = 250000 # we have far less data =(

model_over2 = make_model()
model_over2.load_weights(initial_weights)
baseline_history_over2 = model_over2.fit(
    train_features_over2,
    train_labels_over2,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[early_stopping],
    validation_data=(val_features_over2, val_labels_over2))

In [None]:
baseline_results_over2 = model_over2.evaluate(test_features, test_labels,
                                  batch_size=BATCH_SIZE, verbose=0)
for name, value in zip(model.metrics_names, baseline_results_over2):
    print(name, ': ', value)
print()

In [None]:
model_over2.save('creditCardFraud_tfnn_oversampledV2_b250000.h5')

### Oversampling SMOTE

In [None]:
sin = pd.read_csv("cardFraud_X_train_oversampled_syntethic.csv")
sin_y = pd.read_csv("cardFraud_Y_train_oversampled_syntethic.csv")
sin['Class'] = sin_y


print(len(sin))
print(sin['Class'].value_counts())
sin.head()

In [None]:
train_sin, val_sin = train_test_split(sin, test_size=0.10, random_state=777)

# np arrays for the features for the neural network
train_labels_sin = np.array(train_sin.pop('Class'))
bool_train_labels_sin = train_labels_sin != 0
val_labels_sin = np.array(val_sin.pop('Class'))

train_features_sin = np.array(train_sin)
val_features_sin = np.array(val_sin)

In [None]:
# normalize the features to avoid peeking during tests
scaler = StandardScaler()
train_features_sin = scaler.fit_transform(train_features_sin)
val_features_sin = scaler.transform(val_features_sin)

train_features_sin = np.clip(train_features_sin, -5, 5)
val_features_sin = np.clip(val_features_sin, -5, 5)

print('Training labels shape:', train_labels_sin.shape)
print('Validation labels shape:', val_labels_sin.shape)
print('Test labels shape:', test_labels.shape)

print('Training features shape:', train_features_sin.shape)
print('Validation features shape:', val_features_sin.shape)
print('Test features shape:', test_features.shape)

In [None]:
BATCH_SIZE = 250000 

model_sin = make_model()
model_sin.load_weights(initial_weights)
baseline_history_sin = model_sin.fit(
    train_features_sin,
    train_labels_sin,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[early_stopping],
    validation_data=(val_features_sin, val_labels_sin))

In [None]:
baseline_results_sin = model_sin.evaluate(test_features, test_labels,
                                  batch_size=BATCH_SIZE, verbose=0)
for name, value in zip(model.metrics_names, baseline_results_sin):
    print(name, ': ', value)
print()

In [None]:
model_sin.save('creditCardFraud_tfnn_oversampledSin_b250000.h5')

## Explainng NN

In [None]:
sample = test_features[3398:3399]
sample

## SHAP 

In [None]:
# init the JS visualization code
shap.initjs()

### DeepExplainer

#### Undersampling

In [None]:
tmp = time.time()
background = train_features[0:100]
explainer = shap.DeepExplainer(model_und,background)
shap_values = explainer.shap_values(test_features[:100])
print("Deep Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling 

In [None]:
tmp = time.time()
background = train_features[0:1000]
explainer = shap.DeepExplainer(model_over,background)
shap_values = explainer.shap_values(test_features[:100])
print("Deep Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling v2

In [None]:
tmp = time.time()
background = train_features[0:1000]
explainer = shap.DeepExplainer(model_over2,background)
shap_values = explainer.shap_values(test_features[:100])
print("Deep Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling SMOTE

In [None]:
tmp = time.time()
background = train_features[0:1000]
explainer = shap.DeepExplainer(model_sin,background)
shap_values = explainer.shap_values(test_features[:100])
print("Deep Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(explainer.expected_value[0].numpy().tolist(), shap_values[0][0,:], test_features[1016:1017][0])

### Gradient Explainer

#### Undersampling

In [None]:
tmp = time.time()
background = train_features[0:100]
gradient_explainer = shap.GradientExplainer(model_und,background)
shap_values, indexes = gradient_explainer.shap_values(test_features[:100], ranked_outputs=2)
print("Gradient Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(0, shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(0, shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling

In [None]:
tmp = time.time()
background = train_features[0:1000]
gradient_explainer = shap.GradientExplainer(model_over,background)
shap_values, indexes = gradient_explainer.shap_values(test_features[:100], ranked_outputs=2)
print("Gradient Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(0, shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(0, shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling v2

In [None]:
tmp = time.time()
background = train_features[0:1000]
gradient_explainer = shap.GradientExplainer(model_over2,background)
shap_values, indexes = gradient_explainer.shap_values(test_features[:100], ranked_outputs=2)
print("Gradient Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(0, shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(0, shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling SMOTE

In [None]:
tmp = time.time()
background = train_features[0:1000]
gradient_explainer = shap.GradientExplainer(model_sin,background)
shap_values, indexes = gradient_explainer.shap_values(test_features[:100], ranked_outputs=2)
print("Gradient Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(0, shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(0, shap_values[0][0,:], test_features[1016:1017][0])

### Kernel Explainer

#### Undersampling

In [None]:
tmp = time.time()
background = train_features[0:100]
kernel_explainer = shap.KernelExplainer(model_und.predict,background)
shap_values = kernel_explainer.shap_values(test_features[:100])
print("Kernel Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling

In [None]:
tmp = time.time()
background = train_features[0:100]
kernel_explainer = shap.KernelExplainer(model_over.predict,background)
shap_values = kernel_explainer.shap_values(test_features[:100])
print("Kernel Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling v2

In [None]:
tmp = time.time()
background = train_features[0:100]
kernel_explainer = shap.KernelExplainer(model_over2.predict,background)
shap_values = kernel_explainer.shap_values(test_features[:100])
print("Kernel Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0][0,:], test_features[1016:1017][0])

#### Oversampling SMOTE

In [None]:
tmp = time.time()
background = train_features[0:100]
kernel_explainer = shap.KernelExplainer(model_sin.predict,background)
shap_values = kernel_explainer.shap_values(test_features[:100])
print("Kernel Explainer took: " + str(time.time() - tmp) + " seconds.")

In [None]:
# plot the explanations
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0], test_features[1016:1017][0])

In [None]:
shap.force_plot(kernel_explainer.expected_value[0], shap_values[0][0,:], test_features[1016:1017][0])

## LIME 

### Undersampling

In [None]:
def predict_und(x):
    predictions = model_und.predict(x)
    y = 0
    # Non-Fraud' = 0 ,'Fraud' = 1
    for i in predictions:
        if y == 0: # priemra predicció, creem array de retu
            res = np.array([[(1-i[0]),(i[0])]])
            y = 1
        else:
            res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
    return res

In [None]:
predictions_lime = lambda x: predict_und(x).astype(float)
X = train_features[0:100]
explainer = lime.lime_tabular.LimeTabularExplainer(test_features,feature_names = train.columns,class_names=['Non-Fraud','Fraud'],kernel_width=5)

In [None]:
exp.as_pyplot_figure()

In [None]:
tmp = time.time()
exp = explainer.explain_instance(sample[0], predictions_lime,num_features=len(test.columns))
exp.show_in_notebook(show_all=False)
tmp2 = time.time()

In [None]:
print("LIME took: " + str(tmp2 - tmp) + " seconds.")

### Oversampling

In [None]:
def predict_over(x):
    predictions = model_over.predict(x)
    y = 0
    # Non-Fraud' = 0 ,'Fraud' = 1
    for i in predictions:
        if y == 0: # priemra predicció, creem array de retu
            res = np.array([[(1-i[0]),(i[0])]])
            y = 1
        else:
            res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
    return res

In [None]:
predictions_lime = lambda x: predict_over(x).astype(float)
X = train_features[0:100]
explainer = lime.lime_tabular.LimeTabularExplainer(test_features,feature_names = data.columns,class_names=['Non-Fraud','Fraud'],kernel_width=5)

In [None]:
tmp = time.time()
exp = explainer.explain_instance(sample[0], predictions_lime,num_features=len(test.columns))
exp.show_in_notebook(show_all=False)
tmp2 = time.time()

In [None]:
print("LIME took: " + str(tmp2 - tmp) + " seconds.")

### Oversampling v2

In [None]:
def predict_over2(x):
    predictions = model_over2.predict(x)
    y = 0
    # Non-Fraud' = 0 ,'Fraud' = 1
    for i in predictions:
        if y == 0: # priemra predicció, creem array de retu
            res = np.array([[(1-i[0]),(i[0])]])
            y = 1
        else:
            res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
    return res

In [None]:
predictions_lime = lambda x: predict_over2(x).astype(float)
X = train_features[0:100]
explainer = lime.lime_tabular.LimeTabularExplainer(test_features,feature_names = data.columns,class_names=['Non-Fraud','Fraud'],kernel_width=5)

In [None]:
tmp = time.time()
exp = explainer.explain_instance(sample[0], predictions_lime,num_features=len(test.columns))
exp.show_in_notebook(show_all=False)
tmp2 = time.time()

In [None]:
print("LIME took: " + str(tmp2 - tmp) + " seconds.")

### Oversampling SMOTE

In [None]:
def predict_sin(x):
    predictions = model_sin.predict(x)
    y = 0
    # Non-Fraud' = 0 ,'Fraud' = 1
    for i in predictions:
        if y == 0: # priemra predicció, creem array de retu
            res = np.array([[(1-i[0]),(i[0])]])
            y = 1
        else:
            res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
    return res

In [None]:
predictions_lime = lambda x: predict_sin(x).astype(float)
X = train_features[0:100]
explainer = lime.lime_tabular.LimeTabularExplainer(test_features,feature_names = train.columns,class_names=['Non-Fraud','Fraud'],kernel_width=5)

In [None]:
tmp = time.time()
exp = explainer.explain_instance(sample[0], predictions_lime,num_features=len(test.columns))
exp.show_in_notebook(show_all=False)
tmp2 = time.time()

In [None]:
print("LIME took: " + str(tmp2 - tmp) + " seconds.")

## iBreakDown

### Undersampling

In [None]:
# mini hackfix for ibreakdown to work for keras
class predict_und_ibreakdown:
    self = model_und
    def predict_proba(x):
        predictions = model_und.predict(x)
        y = 0
        # Non-Fraud' = 0 ,'Fraud' = 1
        for i in predictions:
            if y == 0: # priemra predicció, creem array de retu
                res = np.array([[(1-i[0]),(i[0])]])
                y = 1
            else:
                res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
        return res

In [None]:
explainer = ClassificationExplainer(predict_und_ibreakdown)
classes = ['Non-Fraud', 'Fraud']

In [None]:
explainer.fit(train_features[:100], test.columns, classes)

In [None]:
model_und.predict(sample)

In [None]:
tmp = time.time()
exp = explainer.explain(sample)
print("iBreakDown took: " + str(time.time() - tmp) + " seconds.")
print(model_und.predict(sample))
exp.print()

### Oversampling

In [None]:
# mini hackfix for ibreakdown to work for keras
class predict_over_ibreakdown:
    self = model_over
    def predict_proba(x):
        predictions = model_over.predict(x)
        y = 0
        # Non-Fraud' = 0 ,'Fraud' = 1
        for i in predictions:
            if y == 0: # priemra predicció, creem array de retu
                res = np.array([[(1-i[0]),(i[0])]])
                y = 1
            else:
                res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
        return res

In [None]:
explainer = ClassificationExplainer(predict_over_ibreakdown)
classes = ['Non-Fraud', 'Fraud']

In [None]:
explainer.fit(train_features[:100], test.columns, classes)

In [None]:
model_over.predict(sample)

In [None]:
tmp = time.time()
exp = explainer.explain(sample)
print("iBreakDown took: " + str(time.time() - tmp) + " seconds.")
print(model_over.predict(sample))
exp.print()

### Oversampling v2

In [None]:
# mini hackfix for ibreakdown to work for keras
class predict_over2_ibreakdown:
    self = model_over2
    def predict_proba(x):
        predictions = model_over2.predict(x)
        y = 0
        # Non-Fraud' = 0 ,'Fraud' = 1
        for i in predictions:
            if y == 0: # priemra predicció, creem array de retu
                res = np.array([[(1-i[0]),(i[0])]])
                y = 1
            else:
                res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
        return res

In [None]:
explainer = ClassificationExplainer(predict_over2_ibreakdown)
classes = ['Non-Fraud', 'Fraud']

In [None]:
explainer.fit(train_features[:100], test.columns, classes)

In [None]:
model_over2.predict(sample)

In [None]:
tmp = time.time()
exp = explainer.explain(sample)
print("iBreakDown took: " + str(time.time() - tmp) + " seconds.")
print(model_over2.predict(sample))
exp.print()

### Oversampling SMOTE

In [None]:
# mini hackfix for ibreakdown to work for keras
class predict_sin_ibreakdown:
    self = model_sin
    def predict_proba(x):
        predictions = model_sin.predict(x)
        y = 0
        # Non-Fraud' = 0 ,'Fraud' = 1
        for i in predictions:
            if y == 0: # priemra predicció, creem array de retu
                res = np.array([[(1-i[0]),(i[0])]])
                y = 1
            else:
                res = np.concatenate((res, np.array([[(1-i[0]),(i[0])]])), axis=0)
        return res

In [None]:
explainer = ClassificationExplainer(predict_sin_ibreakdown)
classes = ['Non-Fraud', 'Fraud']

In [None]:
explainer.fit(train_features[:100], test.columns, classes)

In [None]:
model_sin.predict(sample)

In [None]:
tmp = time.time()
exp = explainer.explain(sample)
print("iBreakDown took: " + str(time.time() - tmp) + " seconds.")
print(model_sin.predict(sample))
exp.print()