In [102]:
#import libraries
import pandas as pd
import numpy as np
import random
import math
#sklearn for preprocessing and evaluation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score
#matplotlib and seaborn to graph
import matplotlib.pyplot as plt
import seaborn as sns
#pytorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [103]:
#check to make sure cuda working
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("Current device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))

CUDA available: True
CUDA device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 2070 SUPER


In [104]:
#load in dataset
df = pd.read_csv('creditcard_fraud_detection.csv')
print(df.head())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [105]:
#check nas
df.isna().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [106]:
#calculate proportion of fraud in dataset
print(sum(df['Class'])/len(df))
#dataset is sparse, much more non fraud than fraud

0.001727485630620034


In [107]:
#preprocess and split for train test

#take log of amount
epsilon = 1e-7
df['Log_Amount'] = np.log(df['Amount'] + epsilon)

#X is everything but "Class", and untransformed amount and time columns
X = df.drop(['Class', 'Amount', 'Time'], axis = 1).values
#y is class (0 = no fraud, 1 = fraud)
y= df['Class'].values

scalar = StandardScaler()
#PCA features are already scaled, but time and amount aren't
X_scaled = scalar.fit_transform(X)

#train test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size= 0.2, random_state= 50, 
    #stratify y since dataset is sparse and we want to ensureki
    stratify= y
    )

#create validation set
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train,
    test_size=0.2,
    random_state=42,
    stratify=y_train
)

In [108]:
#wrap dataset in classes for pytorch
class CreditFraudDataset(Dataset):
    def __init__(self, X, y):
        #convert np arrays to tensors
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    #return len dataset
    def __len__(self):
        return len(self.X)

    #return item of X, y at specific index
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 2048

#wrap datasets
train_dataset = CreditFraudDataset(X_train, y_train)
val_dataset   = CreditFraudDataset(X_val, y_val)
test_dataset  = CreditFraudDataset(X_test, y_test)

In [109]:
#set up neural net
class FraudNet(nn.Module):
    def __init__(self, input_dim, dropout=0.3):  # dropout is now a hyperparameter
        super().__init__()
        self.net = nn.Sequential(

            #layer 1
            nn.Linear(input_dim, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout),


            #layer 2
            nn.Linear(64,64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout),

            #layer 3
            nn.Linear(64,32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(dropout),

            #only one output logit, for fraud or not
            nn.Linear(32, 1)
        )

    def forward(self, x):
        #x: (batch_size, input_dim)
        logits = self.net(x).squeeze(1)  #(batch_size,)
        return logits

input_dim = X_train.shape[1]
#init a temporary model just to inspect the architecture
tmp_model = FraudNet(input_dim)
print(tmp_model)

FraudNet(
  (net): Sequential(
    (0): Linear(in_features=29, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=64, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=64, out_features=32, bias=True)
    (9): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.3, inplace=False)
    (12): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [110]:
#set up device
#todo: set up cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [111]:
#set up bce for model
#count number of each class
class_counts = np.bincount(y_train)
neg, pos = class_counts[0], class_counts[1]
#set weights
pos_weight = torch.tensor([neg / pos], dtype=torch.float32).to(device)

In [112]:
#bias initilization
base_rate = pos / (neg + pos)

epsilon = 1e-7
base_rate = min(max(base_rate, epsilon), 1 - epsilon)
bias_init = math.log(base_rate / (1.0 - base_rate))

print("Base fraud rate:", base_rate)
print("Initial output bias (logit):", bias_init)

Base fraud rate: 0.0017281485220215498
Initial output bias (logit): -6.358975018556549


In [113]:
#function wrapper to initialize training loop with different hyperparameters
def train_eval_one_config(lr, dropout, weight_decay=0.0):
    #take hyperparameters as arguments, return best score and best model
    
    #initialize loaders here, batch size is a hyperparameter so use that
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    #set up model
    model = FraudNet(input_dim, dropout=dropout).to(device)

    #bias initialization
    with torch.no_grad():
        model.net[-1].bias.fill_(bias_init)

    #set up bce for model
    #use BCE to weight fraud more in the model. Model treats missed fraud transactions more harshly in loss function
    #take learning rate and weight decay as arguments
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    best_val_pr_auc = -np.inf
    best_state_dict = None

    #set max epochs and patience
    num_epochs = 100
    
    #early stopping made performance worse, removing it for now
    patience   = 10           #stop if val PR-AUC doesn't improve for 5 epochs
    epochs_no_improve = 0   #epochs without model improving pr auc

    #run training loop for each epoch
    for epoch in range(num_epochs):

        #train
        model.train()
        running_loss = 0.0

        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            logits = model(X_batch)               #output scores as logits instead of probabilities for bce
            loss = criterion(logits, y_batch)     #calc loss
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * X_batch.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        #evaluate on validation set for each epoch
        model.eval()
        all_logits = []
        all_targets = []

        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch = X_batch.to(device)
                logits = model(X_batch)
                all_logits.append(logits.cpu())
                all_targets.append(y_batch)

        all_logits = torch.cat(all_logits)
        all_targets = torch.cat(all_targets)

        #converts logits to p
        probs = torch.sigmoid(all_logits).numpy()
        targets = all_targets.numpy()

        #calculate roc auc score
        #roc = roc_auc_score(targets, probs)
        
        #auc pr score (validation metric we care most about)
        pr_auc = average_precision_score(targets, probs)

        #track best validation PR-AUC for this config
        if pr_auc > best_val_pr_auc:
            best_val_pr_auc = pr_auc
            best_state_dict = model.state_dict()
            #if model is improvement, reset counter
            epochs_no_improve = 0
        else:
            #else increment counter
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            #if we hit patience threshold, break
            print(f"Early stopping at epoch {epoch+1}")
            break
        

        #print output for epoch (optional, you can comment this out if it's too verbose)
        print(f"  Epoch {epoch+1:02d} | Loss: {epoch_loss:.4f} | Val PR-AUC: {pr_auc:.4f}")

    return best_val_pr_auc, best_state_dict


In [114]:
#hyperparameter "ranges" for random search
#we'll *sample* from these instead of trying every combination
def sample_hparams():
    #learning rate: sample log-uniform between 1e-4 and 3e-3
    log_lr_min = math.log10(1e-4)
    log_lr_max = math.log10(3e-3)
    log_lr = random.uniform(log_lr_min, log_lr_max)
    lr = 10 ** log_lr

    #dropout: uniform between 0.1 and 0.5
    dropout = random.uniform(0.1, 0.6)

    #weight decay: small set of options (including no weight decay)
    weight_decay = random.choice([0.0, 1e-5, 1e-4])

    return {
        "lr": lr,
        "dropout": dropout,
        "weight_decay": weight_decay
    }

In [115]:
#number of random configs to try
num_trials = 30
best_hparams = None
best_val_score = -np.inf
best_model_state = None

for trial in range(1, num_trials + 1):
    print(f"\n=== Trial {trial}/{num_trials} ===")

    #sample a random hyperparameter configuration
    hparams = sample_hparams()
    lr           = hparams["lr"]
    dropout      = hparams["dropout"]
    weight_decay = hparams["weight_decay"]

    print(f"Sampled hyperparameters:")
    print(f"  lr          = {lr:.6f}")
    print(f"  dropout     = {dropout:.3f}")
    print(f"  weight_decay= {weight_decay}")

    #train and evaluate this config on the validation set
    val_pr_auc, state_dict = train_eval_one_config(
        lr=lr,
        dropout=dropout,
        weight_decay=weight_decay
    )

    print(f"Best val PR-AUC for this trial: {val_pr_auc:.4f}")

    #keep track of overall best config
    if val_pr_auc > best_val_score:
        best_val_score = val_pr_auc
        best_hparams = hparams
        best_model_state = state_dict

print("Best hyperparameters found:")
print(best_hparams)
print("Best validation PR-AUC:", best_val_score)


=== Trial 1/30 ===
Sampled hyperparameters:
  lr          = 0.002623
  dropout     = 0.329
  weight_decay= 0.0001
  Epoch 01 | Loss: 1.1820 | Val PR-AUC: 0.6921
  Epoch 02 | Loss: 0.5822 | Val PR-AUC: 0.6656
  Epoch 03 | Loss: 0.4229 | Val PR-AUC: 0.6968
  Epoch 04 | Loss: 0.3137 | Val PR-AUC: 0.7077
  Epoch 05 | Loss: 0.2712 | Val PR-AUC: 0.7088
  Epoch 06 | Loss: 0.2955 | Val PR-AUC: 0.7087
  Epoch 07 | Loss: 0.2615 | Val PR-AUC: 0.7130
  Epoch 08 | Loss: 0.2783 | Val PR-AUC: 0.7009
  Epoch 09 | Loss: 0.2960 | Val PR-AUC: 0.7032
  Epoch 10 | Loss: 0.2389 | Val PR-AUC: 0.7142
  Epoch 11 | Loss: 0.2217 | Val PR-AUC: 0.7184
  Epoch 12 | Loss: 0.2222 | Val PR-AUC: 0.7066
  Epoch 13 | Loss: 0.2342 | Val PR-AUC: 0.6964
  Epoch 14 | Loss: 0.2049 | Val PR-AUC: 0.7016
  Epoch 15 | Loss: 0.2120 | Val PR-AUC: 0.7131
  Epoch 16 | Loss: 0.1984 | Val PR-AUC: 0.7035
  Epoch 17 | Loss: 0.2150 | Val PR-AUC: 0.6993
  Epoch 18 | Loss: 0.2471 | Val PR-AUC: 0.7009
  Epoch 19 | Loss: 0.1796 | Val PR-AUC:

In [116]:
#rebuild the best model and load its weights
best_model = FraudNet(input_dim, dropout=best_hparams["dropout"]).to(device)
best_model.load_state_dict(best_model_state)
best_model.eval()

#use the best batch_size for the test loader
test_loader  = DataLoader(test_dataset, batch_size=2048, shuffle=False)

all_logits = []
all_targets = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        logits = best_model(X_batch)
        all_logits.append(logits.cpu())
        all_targets.append(y_batch)

all_logits = torch.cat(all_logits)
all_targets = torch.cat(all_targets)

#converts logits to p
probs = torch.sigmoid(all_logits).numpy()
targets = all_targets.numpy()

#calculate roc auc score on the test set
#roc = roc_auc_score(targets, probs)
#auc pr score on the test set
pr_auc = average_precision_score(targets, probs) 

print(f"Test PR-AUC with best hyperparameters: {pr_auc:.4f}")

Test PR-AUC with best hyperparameters: 0.7972


In [117]:
#threshold for confusion matrix
threshold = 0.5
#make predictions
preds = (probs >= threshold).astype(int)

#print confusion matrix
print("Confusion matrix:")
print(confusion_matrix(targets, preds))

print("\nClassification report:")
print(classification_report(targets, preds, digits=4))


Confusion matrix:
[[56442   422]
 [   10    88]]

Classification report:
              precision    recall  f1-score   support

         0.0     0.9998    0.9926    0.9962     56864
         1.0     0.1725    0.8980    0.2895        98

    accuracy                         0.9924     56962
   macro avg     0.5862    0.9453    0.6428     56962
weighted avg     0.9984    0.9924    0.9950     56962

