In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random


In [2]:
# Set seed 41
random_seed = 41
torch.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/nomanahmed21/Credit-Card-Fraud-Detection/refs/heads/main/processed_data.csv')

In [4]:
df = df.drop('Unnamed: 0', axis=1)

In [5]:
df

Unnamed: 0,amount,transaction_hour,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,merchant_category_Clothing,merchant_category_Electronics,merchant_category_Food,merchant_category_Grocery,merchant_category_Travel,is_fraud
0,84.47,22,0,0,66,3,40,0.0,1.0,0.0,0.0,0.0,0
1,541.82,3,1,0,87,1,64,0.0,0.0,0.0,0.0,1.0,0
2,237.01,17,0,0,49,1,61,0.0,0.0,0.0,1.0,0.0,0
3,164.33,4,0,1,72,3,34,0.0,0.0,0.0,1.0,0.0,0
4,30.53,15,0,0,79,0,44,0.0,0.0,1.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,350.91,22,0,0,99,4,37,0.0,0.0,1.0,0.0,0.0,0
9996,410.04,5,0,0,70,3,25,1.0,0.0,0.0,0.0,0.0,0
9997,527.75,21,0,0,44,2,45,0.0,1.0,0.0,0.0,0.0,0
9998,91.20,2,0,0,38,0,37,0.0,1.0,0.0,0.0,0.0,0


In [6]:
class FraudDataset(Dataset):
  def __init__(self, df, transform=None):
    self.x = df.drop('is_fraud', axis=1).values
    self.y = df['is_fraud'].values
    self.transform = transform

  def __len__(self):
    return len(self.x)

  def __getitem__(self, idx):
    x = self.x[idx]
    y = self.y[idx]

    x = torch.tensor(x, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)

    if self.transform:
      x = self.transform(x)

    return x, y


In [7]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state = random_seed)

In [8]:
train_df.head()

Unnamed: 0,amount,transaction_hour,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,merchant_category_Clothing,merchant_category_Electronics,merchant_category_Food,merchant_category_Grocery,merchant_category_Travel,is_fraud
5627,75.45,13,0,0,71,2,30,0.0,1.0,0.0,0.0,0.0,0
6594,248.81,16,0,0,44,1,21,1.0,0.0,0.0,0.0,0.0,0
9004,178.98,19,0,1,74,4,22,1.0,0.0,0.0,0.0,0.0,0
5153,257.18,4,0,0,42,2,34,1.0,0.0,0.0,0.0,0.0,0
7616,69.56,4,0,0,92,2,44,0.0,0.0,0.0,0.0,1.0,0


In [9]:
fraud = train_df.loc[train_df['is_fraud'] == 1, 'is_fraud'].count()
normal = train_df.loc[train_df['is_fraud'] == 0, 'is_fraud'].count()

print(f'Fraud: {fraud}')
print(f'Normal: {normal}')

Fraud: 113
Normal: 7887


In [10]:
class_weights = torch.tensor(
    [1.0, normal / fraud],
    dtype=torch.float32
)
class_weights

tensor([ 1.0000, 69.7965])

In [11]:
test_df.head()

Unnamed: 0,amount,transaction_hour,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,merchant_category_Clothing,merchant_category_Electronics,merchant_category_Food,merchant_category_Grocery,merchant_category_Travel,is_fraud
9745,209.63,21,0,0,47,1,49,0.0,0.0,1.0,0.0,0.0,0
4492,150.83,2,0,0,85,2,33,0.0,0.0,0.0,0.0,1.0,0
3439,476.47,10,1,0,66,4,68,0.0,1.0,0.0,0.0,0.0,0
6077,141.39,0,0,0,49,5,34,0.0,0.0,1.0,0.0,0.0,0
5372,101.75,9,0,0,98,6,62,0.0,0.0,1.0,0.0,0.0,0


In [12]:
train_data = FraudDataset(train_df)
test_data = FraudDataset(test_df)

In [13]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [14]:
for X_train, y_train in train_loader:
  print(f'Shape of X: {X_train.shape}')
  print(f'Shape of y: {y_train.shape}')
  break

Shape of X: torch.Size([32, 12])
Shape of y: torch.Size([32])


In [15]:
# Class Model
class FraudModel(nn.Module):
  def __init__(self, in_features=12, h1=16, h2=8, out_features=2):
    super().__init__()
    self.fc1 = nn.Linear(in_features, h1)
    self.fc2 = nn.Linear(h1, h2)
    self.out = nn.Linear(h2, out_features)

  def forward(self, X):
    X = F.relu(self.fc1(X))
    X = F.relu(self.fc2(X))
    X = self.out(X)

    return X

In [16]:
model = FraudModel()

In [17]:
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)

In [18]:
epochs = 10
train_loss = []

for i in range(epochs):
  epoch_loss = 0
  for b, (X_train, y_train) in enumerate(train_loader):

    b+=1

    # Reshaping y_train so that torch size of label is same as input/features
    # y_train = y_train.unsqueeze(1).float()

    # Forward Pass
    prediction = model(X_train)
    loss = criterion(prediction, y_train)

    # Backward Loss
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item()

    if b%100 ==0:
      print(f'Epoch: {i+1}, Batch: {b}, Loss: {loss.item()}')

  train_loss.append(epoch_loss/len(train_loader))
  print(f'Epoch: {i+1}, Loss: {epoch_loss/len(train_loader)}')

Epoch: 1, Batch: 100, Loss: 0.31613069772720337
Epoch: 1, Batch: 200, Loss: 0.3627563416957855
Epoch: 1, Loss: 0.6204721863269806
Epoch: 2, Batch: 100, Loss: 0.11934354156255722
Epoch: 2, Batch: 200, Loss: 0.1845618188381195
Epoch: 2, Loss: 0.47817367753386497
Epoch: 3, Batch: 100, Loss: 0.32684776186943054
Epoch: 3, Batch: 200, Loss: 0.4049142301082611
Epoch: 3, Loss: 0.41317319667339325
Epoch: 4, Batch: 100, Loss: 1.087848424911499
Epoch: 4, Batch: 200, Loss: 0.13109548389911652
Epoch: 4, Loss: 0.3428438658863306
Epoch: 5, Batch: 100, Loss: 0.6050821542739868
Epoch: 5, Batch: 200, Loss: 0.09565600007772446
Epoch: 5, Loss: 0.32587374103069305
Epoch: 6, Batch: 100, Loss: 0.0506843701004982
Epoch: 6, Batch: 200, Loss: 0.6483384966850281
Epoch: 6, Loss: 0.30445870259404184
Epoch: 7, Batch: 100, Loss: 0.17648519575595856
Epoch: 7, Batch: 200, Loss: 0.23246383666992188
Epoch: 7, Loss: 0.31877579383552074
Epoch: 8, Batch: 100, Loss: 0.14623866975307465
Epoch: 8, Batch: 200, Loss: 0.19347538

In [19]:
model.eval()

FraudModel(
  (fc1): Linear(in_features=12, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=8, bias=True)
  (out): Linear(in_features=8, out_features=2, bias=True)
)

In [20]:
TP = 0
TN = 0
FP = 0
FN = 0

In [21]:
with torch.no_grad():
  test_loss = 0
  for b, (X_test, y_test) in enumerate(test_loader):
    y_val = model(X_test)

    probs = torch.softmax(y_val, dim=1)
    fraud_probs = probs[:, 1]

    preds = (fraud_probs > 0.75).long()




    # Confussion Matrix
    TP += ((preds == 1) & (y_test == 1)).sum().item()
    TN += ((preds == 0) & (y_test == 0)).sum().item()
    FP += ((preds == 1) & (y_test == 0)).sum().item()
    FN += ((preds == 0) & (y_test == 1)).sum().item()

    loss = criterion(y_val, y_test)
    test_loss += loss.item()

    if b%10 ==0:
      print(f'Battch: {b}, loss: {loss.item()}')

  print(f'Test Loss: {test_loss/len(test_loader)}')

Battch: 0, loss: 0.22254988551139832
Battch: 10, loss: 0.17141029238700867
Battch: 20, loss: 0.5963832139968872
Battch: 30, loss: 0.12434149533510208
Battch: 40, loss: 0.26764464378356934
Battch: 50, loss: 0.8448571562767029
Battch: 60, loss: 0.09332764893770218
Test Loss: 0.25921772621453754


In [22]:
# Accuracy
accuracy = (TP + TN) / (TP+TN+FP+FN)
print(f"Accuracy : {(accuracy * 100):.2f}%")
# Percision
precision = TP / (TP + FP)
print(f"Precision : {(precision * 100):.2f}%")
# Recall
recall = TP / (TP + FN)
print(f"Recall : {(recall * 100):.2f}%")
# F1 Score
f1_score = 2 * (precision * recall) / (precision + recall)
print(f"F1 Score : {(f1_score * 100):.2f}%")


Accuracy : 96.40%
Precision : 30.23%
Recall : 68.42%
F1 Score : 41.94%


In [23]:
print(f'TP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}')

TP: 26, TN: 1902, FP: 60, FN: 12
