In [14]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import torch
from torch import tensor, nn, optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy, Precision, Recall

---
## Load data

In [2]:
data = pd.read_csv('../data/creditcard.csv')
data.drop('Time', axis=1, inplace=True)

In [3]:
train_df, temp_df = train_test_split(data, test_size=0.8, stratify=data['Class'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.2, stratify=temp_df['Class'], random_state=42)

---
## Create custom dataset class `FraudDataset`

In [4]:
class FraudDataset(Dataset):
    def __init__(self, DataFrame):
        super().__init__()
        df = DataFrame
        self.data = df.to_numpy().astype(np.float32)

    def __len__(self):
        return self.data.shape[0]
    
    def __input_size__(self):
        return self.data.shape[1] -1
    
    def __getitem__(self, idx):
        X = tensor(self.data[idx, :-1], dtype=torch.float32)
        y = tensor(self.data[idx, -1], dtype=torch.float32)
        return X, y

# Instatiate FraudDataset objects for each dataset 
train_set = FraudDataset(train_df)
val_set = FraudDataset(val_df)
test_set = FraudDataset(test_df)

---
## Create DataLoaders

In [5]:
train_loader = DataLoader(train_set,batch_size=32,shuffle=True)
val_loader = DataLoader(val_set,batch_size=32,shuffle=True)
test_loader = DataLoader(test_set,batch_size=32,shuffle=True)

---
## Create NN architectue class `FraudNN`

In [6]:
class FraudNN(nn.Module):
    def __init__(self, input_size):
        super(FraudNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.dropout(x, p=0.2)
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.sigmoid(self.fc3(x))
        return x

input_size = train_set.__input_size__()
nnet = FraudNN(input_size=input_size)

---
## Define training and validation loop

In [20]:
# 1. Define Loss Function
criterion = nn.BCELoss()

# 2. Define Optimizer
optimizer = optim.Adam(nnet.parameters(), lr=0.001)

# 3. Define number of epochs and initialize variable for best validation loss metric
epochs = 10
best_val_loss = [None, float('inf')]

# 4. Define training and validation loops
for epoch in range(epochs):
    # Set model in train mode
    nnet.train()
    train_loss = 0
    
    for X, y in train_loader:
        optimizer.zero_grad()
        outputs = nnet(X)
        loss = criterion(outputs, y.view(-1, 1))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)

    # 5. Set model in eval mode 
    nnet.eval()
    val_loss = 0

    with torch.no_grad():
        for X, y in val_loader:
            val_outputs = nnet(X)
            loss = criterion(val_outputs, y.view(-1, 1))
            val_loss += loss.item()
    val_loss /= len(val_loader)

    # Save the best model metric
    if val_loss < best_val_loss[1]:
        best_val_loss[0] = epoch
        best_val_loss[1] = val_loss

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

print()
string = f'Best Validation Loss: Epoch: {best_val_loss[0]+1}, Validation Loss: {best_val_loss[1]:.4f}'
print('-'*len(string))
print(string)
print('-'*len(string))

Epoch [1/10], Loss: 0.0033, Validation Loss: 0.0157
Epoch [2/10], Loss: 0.0043, Validation Loss: 0.0206
Epoch [3/10], Loss: 0.0061, Validation Loss: 0.0161
Epoch [4/10], Loss: 0.0027, Validation Loss: 0.0190
Epoch [5/10], Loss: 0.0034, Validation Loss: 0.0238
Epoch [6/10], Loss: 0.0060, Validation Loss: 0.0174
Epoch [7/10], Loss: 0.0023, Validation Loss: 0.0158
Epoch [8/10], Loss: 0.0027, Validation Loss: 0.0157
Epoch [9/10], Loss: 0.0019, Validation Loss: 0.0184
Epoch [10/10], Loss: 0.0023, Validation Loss: 0.0160

-------------------------------------------------------
Best Validation Loss: Epoch: 1, Validation Loss: 0.0157
-------------------------------------------------------


---
## Define test loop

In [32]:
acc = Accuracy(task='binary')
precision = Precision(task='binary')
recall = Recall(task='binary')

nnet.eval()

with torch.no_grad():
    for X, y in test_loader:
        outputs = nnet(X)
        preds = (outputs >= 0.55).float()
        acc(preds, y.view(-1,1))
        precision(preds, y.view(-1,1))
        recall(preds, y.view(-1,1))
  

test_accuracy = acc.compute()
test_precision = precision.compute()
test_recall = recall.compute()
print(f'Test Accuracy: {test_accuracy:.4f}, Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}')

Test Accuracy: 0.9993, Test Precision: 0.8636, Test Recall: 0.7215


---
## Overfitting Notes

## fighting overfitting
- dropout=0.2
- learning_rate=1e-3
- weight_decay=1e-4
- weight decay takes values between 0 and 1.
- typically small values like 1e-3.
- adds penalty to loss function to discourage large weights and biases.
- proportional to the current value of the weight and subtracted from the gradient.
- higher the value of the parameter, the less likely the model is to overfit.

## Data Augmentation

## Maximizing performance
- overfit the training set
- reduce overfitting
- fine-tune hyperparameters

### 1. Overfitting Training set
- modify training loop to overfit a single data point (batch size = 1)
```Python
features, labels = next(iter(trainloader))
for i in range(1e-3):
    outputs = model(features)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
```
- Should give an accuracy of 1.0 and a loss of 0
- Helps finding bus in code
- ***goal***: minimize training loss
- create large enough model
- hyperparameters kept to defaults for now

### 2. Reduce Overfitting
- ***Goal***: Maximize the validation accuracy
- experiment with:
    - Dropout
    - Data augmentation
    - weight decay
    - Reducing model capacity
- keep track of each hyperparameter set and corresponding accuracy / metric.
- plot each experiment against the default setting train/val curves

### 3. Fine-Tuning Hyperparameters
- Grid Search:
    - Usually done on the optimizer hyperparameters
    - Uses values of the parameters at a constant inverval
    - Eg. Every momentum value between 0.85 and 0.99 with a constant interval
    - 
```Python
for factor in range(2,6):
    lr = 10**-factor
for val in np.arange(0.85, 1.00, 0.01):
    momentum = val
```
- Random Search:
    - Randomly samples parameters between intervals.
    - Quicker, and possibly better results as searches a less restricted sapce
```Python
factor = np.random.uniform(2,6)
lr = 10**-factor
val = np.random.uniform(0.85, 1.00)
momentum = val
```