In [5]:
# Load in relevant modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import random
import torch
from torch.utils.data import DataLoader

from src.data import FraudDataset, prepare_train_valid_test
from src.models import MLPClassifier
from src.train import train

# Turn off SettingWithCopyWarning
pd.options.mode.chained_assignment = None

In [3]:
# Load in the data
df = pd.read_csv('data/creditcard.csv')

In [4]:
# Get test dataset
_, _, df_test = prepare_train_valid_test(df, valid_prop=0, test_prop=.3)

In [12]:
# Load in models
filename = 'logistic_regression.pickle'
lr_model =  pickle.load(open('model_files/' + filename, "rb"))

mlp_model = MLPClassifier(n_input=30, layers=[30, 30, 30, 2], dropout=.5)
mlp_model.load_state_dict(torch.load('model_files/mlp.th'))
mlp_model.eval()

MLPClassifier(
  (network): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=30, out_features=30, bias=True)
    (2): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Dropout(p=0.5, inplace=False)
    (5): Linear(in_features=30, out_features=30, bias=True)
    (6): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): Dropout(p=0.5, inplace=False)
    (9): Linear(in_features=30, out_features=30, bias=True)
    (10): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Dropout(p=0.5, inplace=False)
    (13): Linear(in_features=30, out_features=2, bias=True)
    (14): ReLU()
  )
)

In [22]:
# Generate predictions for both models on the test set
lr_preds = lr_model.predict(df_test[df_test.columns[:-1]])

test_data = DataLoader(FraudDataset(df_test), batch_size=1024, shuffle=False)
mlp_preds = mlp_model(torch.tensor(df_test.iloc[:, :-1].values).float())
class_preds = (mlp_preds / torch.sum(mlp_preds, dim=1).reshape(-1, 1))
class_1_probs = mlp_preds[:, 1].detach().numpy()
mlp_preds = [0 if pred < 1 else 1 for pred in class_1_probs]

In [32]:
# Find observations for where the two models agree. If that is the case,
# then predict 1. Else predict 0.
mlp_indices = []
for idx in range(len(mlp_preds)):
    if mlp_preds[idx] == 1:
        mlp_indices.append(idx)
        
lr_indices = []
for idx in range(len(lr_preds)):
    if lr_preds[idx] == 1:
        lr_indices.append(idx)
        
strong_positives = set(mlp_indices).intersection(set(lr_indices))

In [62]:
# Evaluate performance on the test set
ensemble_preds = np.array([0 if idx not in strong_positives else 1 for _ in range(len(mlp_preds))])
fraud_indices = np.array(list(df_test.reset_index()[df_test.reset_index()['Class'] == 1].index))
not_fraud_indices = np.array(list(df_test.reset_index()[df_test.reset_index()['Class'] == 0].index))
tp = len(strong_positives.intersection(set(fraud_indices)))
fn = len(fraud_indices) - tp
fp = len(strong_positives) - tp
tn = len(not_fraud_indices) - fn
print(f'accuracy: {np.round(100 * (tp+tn) / (tp+fp+tn+fn), 2)}%')
print(f'precision: {np.round(100 * tp / (tp+fp), 2)}%')
print(f'recall: {np.round(100 * tp / (tp+fn), 2)}%')

accuracy: 99.96%
precision: 94.05%
recall: 73.15%
