In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import sklearn as sk
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split

import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms

## Feature engineering

In [2]:
def is_weekend(datetime: pd.Timestamp) -> int:
    return int(datetime.weekday() >= 5)

def is_night(datetime: pd.Timestamp) -> int:
    return int(datetime.hour < 6 or datetime.hour >= 22)

In [3]:
df = pd.read_csv("combined_data.csv")

df["TX_DATETIME"] = pd.to_datetime(df["TX_DATETIME"])

df["TX_IS_WEEKEND"] = df["TX_DATETIME"].apply(is_weekend)
df["TX_AT_NIGHT"] = df["TX_DATETIME"].apply(is_night)

df.drop(["TX_DATETIME"], axis=1, inplace=True)

## Neural network setup

In [4]:
device = t.device("cuda" if t.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        
        self.n_features = input_size
        self.layers = nn.ModuleList()
        self.activations = nn.ModuleList()
    
    def add_layer(self, layer):
        self.layers.append(layer)
    
    def add_activation(self, activation):
        self.activations.append(activation)
    
    def forward(self, X):
        for layer, activation in zip(self.layers, self.activations):
            X = activation(layer(X))
        return X

In [29]:
X = df.drop(["TX_FRAUD", "TX_FRAUD_SCENARIO"], axis=1).values
y = df["TX_FRAUD"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = t.tensor(X_train, dtype=t.float32)
X_test_tensor = t.tensor(X_test, dtype=t.float32)
y_train_tensor = t.tensor(y_train, dtype=t.long)
y_test_tensor = t.tensor(y_test, dtype=t.long)

train_dataset = t.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = t.utils.data.TensorDataset(X_test_tensor, y_test_tensor)

train_dataloader = t.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = t.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [34]:
model = NeuralNetwork(X.shape[1]).to(device)

class_counts = df["TX_FRAUD"].value_counts()
fraud_weight = class_counts[0] / class_counts[1]
class_weights = t.tensor([1.0, fraud_weight], dtype=t.float32).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)

optimizer = t.optim.Adam(model.parameters(), lr=0.001)

model.add_layer(nn.Linear(X.shape[1], 16))
model.add_activation(nn.ReLU())
model.add_layer(nn.Linear(16, 2))
model.add_activation(nn.ReLU())

## Performance

In [35]:
epochs = 5

for epoch in range(epochs):
    running_loss = 0.0
    for features, labels in train_dataloader:
        optimizer.zero_grad()
        
        output = model(features)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f"Epoch {epoch + 1} loss: {running_loss / len(train_dataloader)}")

all_labels = []
all_predictions = []

with t.no_grad():
    for features, labels in test_dataloader:
        output = model(features)
        _, predicted = t.max(output, 1)
        all_labels.extend(labels.numpy())
        all_predictions.extend(predicted.numpy())
        
print(f"Accuracy: {accuracy_score(all_labels, all_predictions, normalize=True)}")
print(f"Precision: {precision_score(all_labels, all_predictions, average='weighted')}")
print(f"Recall: {recall_score(all_labels, all_predictions, average='weighted')}")
print(f"F1: {f1_score(all_labels, all_predictions, average='weighted')}")

Epoch 1 loss: 0.7125053878940015
Epoch 2 loss: 0.6931472524124593
Epoch 3 loss: 0.693147252367607
Epoch 4 loss: 0.6931472521066481
Epoch 5 loss: 0.6931472521433454
Accuracy: 0.9918450764043086
Precision: 0.9837566555874688


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Recall: 0.9918450764043086
F1: 0.9877843083693564
