In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import sklearn as sk
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms

## Feature engineering

In [2]:
def is_weekend(datetime: pd.Timestamp) -> int:
    return int(datetime.weekday() >= 5)

def is_night(datetime: pd.Timestamp) -> int:
    return int(datetime.hour < 6 or datetime.hour >= 22)

In [3]:
df = pd.read_csv("combined_data.csv")

df["TX_DATETIME"] = pd.to_datetime(df["TX_DATETIME"])

df["TX_IS_WEEKEND"] = df["TX_DATETIME"].apply(is_weekend)
df["TX_AT_NIGHT"] = df["TX_DATETIME"].apply(is_night)

df.drop(["TX_DATETIME"], axis=1, inplace=True)

## Neural network setup

In [4]:
device = t.device("cuda" if t.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        
        self.n_features = input_size
        self.layers = nn.ModuleList()
        self.activations = nn.ModuleList()
    
    def add_layer(self, layer):
        self.layers.append(layer)
    
    def add_activation(self, activation):
        self.activations.append(activation)
    
    def forward(self, X):
        for layer, activation in zip(self.layers, self.activations):
            X = activation(layer(X))
        return X

In [11]:
X = df.drop(["TX_FRAUD", "TX_FRAUD_SCENARIO"], axis=1).values
y = df["TX_FRAUD"].values

X_tensor = t.tensor(X, dtype=t.float32)
y_tensor = t.tensor(y, dtype=t.long)

dataset = t.utils.data.TensorDataset(X_tensor, y_tensor)
dataloader = t.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)

In [25]:
model = NeuralNetwork(X.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = t.optim.Adam(model.parameters(), lr=0.001)

model.add_layer(nn.Linear(X.shape[1], 16))
model.add_activation(nn.ReLU())
model.add_layer(nn.Linear(16, 8))
model.add_activation(nn.ReLU())

## Performance

In [24]:
epochs = 5

for epoch in range(epochs):
    running_loss = 0.0
    for features, labels in dataloader:
        optimizer.zero_grad()
        
        output = model(features)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f"Epoch {epoch + 1} loss: {running_loss / len(dataloader)}")

all_labels = []
all_predictions = []

with t.no_grad():
    for features, labels in dataloader:
        output = model(features)
        _, predicted = t.max(output, 1)
        all_labels.extend(labels.numpy())
        all_predictions.extend(predicted.numpy())
        
print(f"Accuracy: {accuracy_score(all_labels, all_predictions, normalize=True)}")
print(f"Precision: {precision_score(all_labels, all_predictions, average=None)}")
print(f"Recall: {recall_score(all_labels, all_predictions, average=None)}")
print(f"F1: {f1_score(all_labels, all_predictions, average=None)}")

Epoch 1 loss: 363.1645779146053
Epoch 2 loss: 0.08619671653690257
Epoch 3 loss: 0.06728696891725848
Epoch 4 loss: 0.06921932897561246
Epoch 5 loss: 0.07582418370009147
Accuracy: 0.9916307281853656


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Precision: [0.99163073 0.        ]
Recall: [1. 0.]
F1: [0.99579778 0.        ]
