# Imbalanced classification: credit card fraud detection

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib


file = r'/Users/neeraj/Datasets/Kaggle/creditcard.csv'

## Read data and split into training and testing/validation sets
Also separate inputs(X) and outputs (Y)

In [None]:
df = pd.read_csv(file)
df.describe()

In [None]:
train_df, test_df = train_test_split(df, test_size=0.25)

In [None]:
train_Y, test_Y = train_df['Class'], test_df['Class']
train_X, test_X = train_df[train_df.columns[:-1]], test_df[test_df.columns[:-1]]

## Get the weights for each class

In [None]:
not_fraud, fraud = np.bincount(train_Y)
print(not_fraud, fraud)
weights = 1.0/not_fraud, 1.0/fraud
# train_X.hist(figsize=(25,25))

## Data normalisation

In [None]:
# train_X = (train_X - train_X.mean()) / train_X.std()
# test_X = (test_X - test_X.mean()) / test_X.std()
# print(train_X.shape)
# train_X.describe()
train_X, train_Y = train_X.to_numpy('float32'), train_Y.to_numpy('float32')
test_X, test_Y = test_X.to_numpy('float32'), test_Y.to_numpy('float32')

In [None]:
m = np.mean(train_X, axis=0)
train_X -= m
test_X -= m
s = np.std(train_X, axis=0)
train_X /= s
test_X /= s

## Building a binary classifier with a shallow neural network

In [None]:
from tensorflow import keras

In [None]:
model = keras.Sequential(
    [
        keras.layers.Dense(256, activation='relu', input_shape=(train_X.shape[-1],)),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation='sigmoid'),                                   
    ]
)
model.summary()

In [None]:
metrics = [
    keras.metrics.FalseNegatives(name='FN'),
    keras.metrics.FalsePositives(name='FP'),
    keras.metrics.TrueNegatives(name='TN'),
    keras.metrics.TruePositives(name='TP'),
    # keras.metrics.Precision(name='Precision'),
    # keras.metrics.Recall(name='Recall'),
    keras.metrics.F1Score(name='F1Score'),                                
]

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    loss='binary_crossentropy',
    metrics=metrics,
)

In [None]:
# callbacks = [keras.callbacks.ModelCheckpoint('Fraud detection model at epoch{epoch}.h5')]
class_weights = {0: weights[0], 1:weights[1]}
print(class_weights)

In [None]:
model.fit(
    train_X,
    train_Y,
    batch_size=1024,
    epochs=20,
    verbose=2,
    callbacks=None,
    validation_data=(test_X, test_Y),
    class_weight=class_weights,
)

In [None]:
np.mean(train_X.to_numpy()[:,0])

In [None]:
train_X.hist()