In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings as wr
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

In [29]:
# Import df and extract elements
fraud_df = pd.read_csv("synthetic_fraud_dataset.csv")
fraud_df.head()


Unnamed: 0,Transaction_ID,User_ID,Transaction_Amount,Transaction_Type,Timestamp,Account_Balance,Device_Type,Location,Merchant_Category,IP_Address_Flag,...,Daily_Transaction_Count,Avg_Transaction_Amount_7d,Failed_Transaction_Count_7d,Card_Type,Card_Age,Transaction_Distance,Authentication_Method,Risk_Score,Is_Weekend,Fraud_Label
0,TXN_33553,USER_1834,39.79,POS,2023-08-14 19:30:00,93213.17,Laptop,Sydney,Travel,0,...,7,437.63,3,Amex,65,883.17,Biometric,0.8494,0,0
1,TXN_9427,USER_7875,1.19,Bank Transfer,2023-06-07 04:01:00,75725.25,Mobile,New York,Clothing,0,...,13,478.76,4,Mastercard,186,2203.36,Password,0.0959,0,1
2,TXN_199,USER_2734,28.96,Online,2023-06-20 15:25:00,1588.96,Tablet,Mumbai,Restaurants,0,...,14,50.01,4,Visa,226,1909.29,Biometric,0.84,0,1
3,TXN_12447,USER_2617,254.32,ATM Withdrawal,2023-12-07 00:31:00,76807.2,Tablet,New York,Clothing,0,...,8,182.48,4,Visa,76,1311.86,OTP,0.7935,0,1
4,TXN_39489,USER_2014,31.28,POS,2023-11-11 23:44:00,92354.66,Mobile,Mumbai,Electronics,0,...,14,328.69,4,Mastercard,140,966.98,Password,0.3819,1,1


In [31]:
data = fraud_df.copy()

data = data.drop(columns=['Transaction_ID', 'User_ID', 'Timestamp'])

categorical_cols = data.select_dtypes(include=['object']).columns.tolist()
categorical_cols = [col for col in categorical_cols if col != 'Fraud_Label']

data = pd.get_dummies(data, columns=categorical_cols, drop_first=True)

X = data.drop(columns=['Fraud_Label']).values.astype(np.float32)
y = data['Fraud_Label'].values.astype(np.float32)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X = X.reshape(X.shape[0], 1, X.shape[1])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
test_dataset  = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=64)

In [32]:
class FraudDetectionCNN(nn.Module):
    def __init__(self, num_features):
        """
        Args:
            num_features (int): Number of features per transaction (after preprocessing).
        """
        super(FraudDetectionCNN, self).__init__()
        
        self.conv_layers = nn.Sequential(
            # First conv layer: maintains the input length using padding.
            nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            # Second conv layer.
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            # Adaptive pooling to squeeze the feature dimension to a fixed size (1)
            nn.AdaptiveAvgPool1d(1)
        )
        
        # The fully connected part for binary classification.
        self.fc_layers = nn.Sequential(
            nn.Flatten(),  
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [35]:
model = FraudDetectionCNN(num_features=X.shape[2])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device).unsqueeze(1)
        
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")

Epoch 1, Loss: 0.5102
Epoch 2, Loss: 0.3517
Epoch 3, Loss: 0.2649
Epoch 4, Loss: 0.1876
Epoch 5, Loss: 0.1387
Epoch 6, Loss: 0.1113
Epoch 7, Loss: 0.0950
Epoch 8, Loss: 0.0860
Epoch 9, Loss: 0.0799
Epoch 10, Loss: 0.0715
