In [2]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
import seaborn as sns
from pylab import rcParams
from sklearn.model_selection import train_test_split
from keras.models import Model, load_model
from keras.layers import Input, Dense
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers
from sklearn.preprocessing import StandardScaler
import pickle

from sklearn.metrics import (confusion_matrix, precision_recall_curve, auc,
                             roc_curve, recall_score, classification_report, f1_score,
                             precision_recall_fscore_support)

import adm_load as load
import adm_preproc as pre
import adm_datagen as dg


Using TensorFlow backend.


In [3]:
RANDOM_SEED = 498
# load all transaction data from csv
df = load.transactions()
dfn = pre.preproc(df)
X_train, y_train, X_valid, y_valid, X_test, y_test = pre.train_valid_test(dfn, RANDOM_SEED)


model_id = '1f2b6926-476d-4e4b-842c-fe88b5e3a352'
model_path = 'checkpoints/autoencoder-' + model_id + '.h5'
history_path = 'checkpoints/autoencoder-history-'+ model_id

autoencoder = load_model(model_path)
history = pickle.load(open(history_path,'rb'))

  if self.run_code(code, result):


In [None]:
%matplotlib inline

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 14, 8

RANDOM_SEED = 42
LABELS = ["Normal", "Fraud"]


In [None]:
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right');

In [None]:
val_predictions = autoencoder.predict(X_valid)
test_predictions = autoencoder.predict(X_test)
train_predictions = autoencoder.predict(X_train)

In [None]:


val_mse = np.mean(np.power(X_valid - val_predictions, 2), axis=1)
val_error_df = pd.DataFrame({'reconstruction_error': val_mse,
                        'true_class': y_valid})

test_mse = np.mean(np.power(X_test - test_predictions, 2), axis=1)
test_error_df = pd.DataFrame({'reconstruction_error': test_mse,
                        'true_class': y_test})


train_mse = np.mean(np.power(X_train - train_predictions, 2), axis=1)
train_error_df = pd.DataFrame({'reconstruction_error': train_mse,
                        'true_class': y_train})

In [None]:
val_error_df.describe()


In [None]:
# reconstruct error without fraud

fig = plt.figure()
ax = fig.add_subplot(111)
val_normal_error_df = val_error_df[(val_error_df['true_class']== 0)]
_ = ax.hist(val_normal_error_df.reconstruction_error.values, bins=10)

In [None]:

fig = plt.figure()
ax = fig.add_subplot(111)
val_fraud_error_df = val_error_df[val_error_df['true_class'] == 1]
_ = ax.hist(val_fraud_error_df.reconstruction_error.values, bins=10)

In [None]:
val_precision, val_recall, val_th = precision_recall_curve(val_error_df.true_class, val_error_df.reconstruction_error)
plt.plot(val_recall, val_precision, 'b', label='Precision-Recall curve')
plt.title('Recall vs Precision')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.show()

In [None]:
plt.plot(val_th, val_precision[1:], 'b', label='Threshold-Precision curve')
plt.title('Precision for different threshold values')
plt.xlabel('Threshold')
plt.ylabel('Precision')
plt.show()

In [None]:
threshold = 5 * 10**5

groups = val_error_df.groupby('true_class')
fig, ax = plt.subplots()

for name, group in groups:
    ax.plot(group.index, group.reconstruction_error, marker='o', ms=3.5, linestyle='',
            label= "Fraud" if name == 1 else "Normal")
ax.hlines(threshold, ax.get_xlim()[0], ax.get_xlim()[1], colors="r", zorder=100, label='Threshold')
ax.legend()
plt.title("Reconstruction error for different classes")
plt.ylabel("Reconstruction error")
plt.xlabel("Data point index")
plt.show();


In [1]:
threshold = 2 * 10**5

val_y_pred = [1 if e < threshold else 0 for e in val_error_df.reconstruction_error.values]
val_conf_matrix = confusion_matrix(val_error_df.true_class, val_y_pred)


plt.figure(figsize=(12, 12))
sns.heatmap(val_conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d");
plt.title("Validation Confusion Matrix")
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()
fig.savefig('val_confusion_matrix_')

NameError: name 'val_error_df' is not defined

In [None]:
train_y_pred = [1 if e < threshold else 0 for e in train_error_df.reconstruction_error.values]
train_conf_matrix = confusion_matrix(train_error_df.true_class, train_y_pred)


plt.figure(figsize=(12, 12))
sns.heatmap(train_conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d");
plt.title("Training Confusion Matrix")
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()
savefig('train_confusion_matrix',transparent=True)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
X_valid.shape

In [None]:
X_test.shape

In [None]:
val_conf_matrix

In [None]:
n_class = sum(val_conf_matrix)

In [None]:
val_conf_matrix.T / n_class