# Neural Network Implementation

Dataset: [Kaggle-Credit Card Fraud Dataset](https://paperswithcode.com/dataset/kaggle-credit-card-fraud-dataset)

_Data has already undergone PCA and preprocessing to anonymize the data._

The original classes for the data are:

* Fraudulent Transaction: _+1_

* Non-fraudulent Transaction: _0_

In [33]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

In [34]:
data: pd.DataFrame = pd.read_csv("../creditcard.csv")
data["Class"] = np.where(data["Class"] <= 0, -1, 1)

data.shape

(284807, 31)

Preview the data

In [35]:
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,-1
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,-1
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,-1
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,-1
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,-1


### Split the Data for Training and Testing

In [36]:
# Split data into training/testing
training_data_mask: np.ndarray = np.random.rand(len(data)) < 0.8

train: pd.DataFrame = data[training_data_mask]
test: pd.DataFrame = data[~training_data_mask]

X_train = train.drop("Class", axis=1).to_numpy()
y_train = train["Class"].to_numpy()
X_test = test.drop("Class", axis=1).to_numpy()
y_test = test["Class"].to_numpy()

### Scale the Features

In [37]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Model Evaluation Metrics

In [38]:
def evaluate(predictions) -> tuple[tuple[float, float, float, float], tuple[float, float, float]]:
    TP = ((predictions == 1) & (test["Class"] == 1)).sum()
    TN = ((predictions == -1) & (test["Class"] == -1)).sum()
    FP = ((predictions == 1) & (test["Class"] == -1)).sum()
    FN = ((predictions == -1) & (test["Class"] == 1)).sum()

    precision: float = np.nan
    if (TP + FP) > 0:
        precision = TP / (TP + FP)

    recall: float = np.nan
    if (TP + FN) > 0:
        recall = TP / (TP + FN)

    f1: float = np.nan
    if (precision + recall) > 0:
        f1 = 2 * (precision * recall) / (precision + recall)

    print(f"Actual Fraud (Class=1):      {(test['Class'] == 1).sum()}")
    print(f"Actual Non-Fraud (Class=-1): {(test['Class'] == -1).sum()}")
    print()
    print(f"TP: {TP}")
    print(f"TN: {TN}")
    print(f"FP: {FP}")
    print(f"FN: {FN}")
    print()
    print(f"PRECISION: {precision}")
    print(f"RECALL:    {recall}")
    print(f"F1-SCORE:  {f1}")
    print()

    return (TP, TN, FP, FN), (precision, recall, f1)

## Initialize and Train the Neural Network

In [45]:
#clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
#class_weights = {0: 1, 1: len(y_train[y_train == 0]) / len(y_train[y_train == 1])}

# We oversample because of the very imbalanced dataset
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train_scaled, y_train)

clf = MLPClassifier(
    hidden_layer_sizes=(60, 30),
    activation="relu",
    #solver="adam",
    solver="lbfgs",
    max_iter=800,
    random_state=42,
    #class_weight=class_weights,
)

#clf.fit(X_train, y_train)
clf.fit(X_train_balanced, y_train_balanced)

### Make Predictions

In [46]:
# Predict on test set
y_pred = clf.predict(X_test_scaled)

### Get Evaluation Metrics

In [47]:
(TP, TN, FP, FN), (precision, recall, f1) = evaluate(y_pred)

Actual Fraud (Class=1):      104
Actual Non-Fraud (Class=-1): 56781

TP: 87
TN: 56763
FP: 18
FN: 17

PRECISION: 0.8285714285714286
RECALL:    0.8365384615384616
F1-SCORE:  0.8325358851674641

