References:
- [Brain Tumor Detection Using Convolutional Neural Networks](https://medium.com/@mohamedalihabib7/brain-tumor-detection-using-convolutional-neural-networks-30ccef6612b0)
- [Build an Image Classification Model using Convolutional Neural Networks in PyTorch](https://www.analyticsvidhya.com/blog/2019/10/building-image-classification-models-cnn-pytorch/)

Due to the limitations in computation, this CNN model is a simple CNN model with a few layers.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torchvision.transforms import RandomAffine, ToTensor
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from collections import Counter

### Import Dataset

In [None]:
# Define data transformations (resize, normalize, etc.)
resize_transform = transforms.Resize((224, 224))
validation_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Define the dataset path (the directory containing "benign" and "malignant" subfolders)
# dataset_path = '/content/drive/MyDrive/NUS/Y4S1/IT1244/images'
dataset_path = '/content/drive/MyDrive/IT1244/images'

dataset = datasets.ImageFolder(root=dataset_path, transform=resize_transform)

### Image Augmentation

In [None]:
class Augmentation:
  """
  Custom image augmentation class for applying multiple transformations to input images.

  Parameters:
  - num_augmentations (int): The number of augmentations to apply to each input image.
  - translation (tuple of floats): Maximum absolute fraction for horizontal and vertical translations.
  - shear (tuple of floats): Range of shearing angles (degrees).
  - rotation (tuple of floats): Range of clockwise rotation angles (degrees).

  Methods:
  - __call__(self, img_label): Apply augmentations to an input image-label pair.

  Usage:
  Instantiate the class and call it with an image-label pair to generate augmented images.

  Returns:
  - augmentations (list): A list of augmented image-label pairs. Each pair consists of an augmented image (torch.Tensor) and the original label.

  Example:
  augmentation = Augmentation()
  augmented_images = augmentation((input_image, input_label))
  """
  def __init__(self):
    self.num_augmentations = 10
    self.translation = (15 / 224, 15 / 224)
    self.shear = (-15, 15)
    self.rotation = (-25, 25)
    self.toTensor = transforms.ToTensor()
    self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

  def __call__(self, img_label):
    img, label = img_label
    augmentations = []
    for _ in range(self.num_augmentations):
      aug_transform = RandomAffine(degrees=self.rotation, translate=self.translation, shear=self.shear)
      new_img = aug_transform(img)
      new_img = self.toTensor(new_img)
      new_img = self.normalize(new_img)
      augmentations.append((new_img, label))

    return augmentations

### Modelling

In [None]:
class CNN(nn.Module):
  def __init__(self, num_classes=2):
    """
    Initialize a CNN model for binary classification.

    Parameters:
    - num_classes (int): The number of output classes, typically 2 for binary classification.
    """
    super().__init__()

    self.cnn_layers = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(16), # batch normalize to speed up computation
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

    self.flat = nn.Flatten()

    self.fc = nn.Sequential(
        nn.Linear(in_features=64 * 28 * 28, out_features=num_classes, bias=True)
    )

  def forward(self, x):
    """
    Forward pass of the CNN model.

    Parameters:
    - x (torch.Tensor): Input data (images).

    Returns:
    - torch.Tensor: Output predictions after passing through the model.
    """
    x = self.cnn_layers(x)
    x = self.flat(x)
    x = self.fc(x)
    return x

In [None]:
def train_epoch(model, train_loader, criterion, optimizer, device):
  """
  Train a neural network model on a training dataset.

  Parameters:
  - model (torch.nn.Module): The neural network model to be trained.
  - train_loader (torch.utils.data.DataLoader): DataLoader for the training dataset.
  - criterion (torch.nn.Module): The loss function used for optimization.
  - optimizer (torch.optim.Optimizer): The optimizer for updating model weights.
  - device (torch.device): The device (CPU or GPU) on which the training will be performed.

  Returns:
  - float: The total loss over the training dataset.
  """
  model.train()
  running_loss = 0.0
  for imgs, targets in train_loader:
    imgs, targets = imgs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(imgs)
    loss = criterion(outputs, nn.functional.one_hot(targets).float())
    # loss = criterion(outputs, targets) # gives mean loss, since reduction = 'mean' for default
    loss.backward()
    optimizer.step()
    running_loss += loss.item() * imgs.size(0) # mean loss * batch size = total loss of batch

  return running_loss

In [None]:
def validate(model, val_loader, criterion, device):
  """
  Validate a neural network model on a validation dataset.

  Parameters:
  - model (torch.nn.Module): The neural network model to be validated.
  - val_loader (torch.utils.data.DataLoader): DataLoader for the validation dataset.
  - criterion (torch.nn.Module): The loss function used for evaluation (usually the same as used during training).
  - optimizer (torch.optim.Optimizer): The optimizer (not used during validation but required as a parameter).
  - device (torch.device): The device (CPU or GPU) on which the validation will be performed.

  Returns:
  - List: A list of predicted probablities for label 1, a list of predicted labels, list of true labels for the validation dataset.
  """
  model.eval()
  y_pred_probas = []
  y_pred = []
  y_true = []
  with torch.no_grad():
    for imgs, targets in val_loader:
      imgs, targets = imgs.to(device), targets.to(device)
      outputs = model(imgs)
      proba = F.sigmoid(outputs)[:, 1]
      _, pred = torch.max(outputs, dim=1)
      y_pred_probas.extend(proba.cpu())
      y_pred.extend(pred.cpu())
      y_true.extend(targets.cpu())
  return y_pred_probas, y_pred, y_true

In [None]:
def calculate_evaluation_metrics(y_true, y_pred, y_pred_probas):
  """
  Calculates the evaluation metrics for validation.

  Parameters:
  - y_true (List): The true labels of the validation dataset.
  - y_pred (List): The predicted labels of the validation dataset, value can be 0 or 1.
  - y_pred_probas (List): The predicted probabilities for label 1 of the validation dataset, values are between 0 and 1.

  Returns:
  - List: A list of float values corresponding to the metrics accuracy, precision, recall, f1, specificity, roc_auc, pr_auc
  """
  roc_auc = roc_auc_score(y_true, y_pred_probas)
  precision_n, recall_n, _ = precision_recall_curve(y_true, y_pred_probas)
  pr_auc = auc(recall_n, precision_n)
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
  accuracy = (tn + tp) / (tn + fp + fn + tp)
  precision = precision_score(y_true, y_pred)
  recall = recall_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred)
  specificity = tn / (tn + fp)
  return accuracy, precision, recall, f1, specificity, roc_auc, pr_auc

### CV with no Hyperparameter Tuning

In [None]:
def print_validation_performance(fold_accuracies, fold_precisions, fold_recalls, fold_specificities, fold_f1_scores, fold_roc_auc, fold_pr_auc):
  """
  Calculates and prints the validation performace.

  Parameters:
  - fold_accuracies (List): The accuracy score obtained from each fold.
  - fold_precisions (List): The precision score obtained from each fold.
  - fold_recalls (List): The recall score obtained from each fold.
  - fold_specificities (List): The specificity score obtained from each fold.
  - fold_f1_scores (List): The f1 score obtained from each fold.
  - fold_roc_auc (List): The roc_auc score obtained from each fold.
  - fold_pr_auc (List): The pr_auc score obtained from each fold.
  """
  mean_accuracy = np.mean(fold_accuracies)
  mean_precision = np.mean(fold_precisions)
  mean_recall = np.mean(fold_recalls)
  mean_specificity = np.mean(fold_specificities)
  mean_f1 = np.mean(fold_f1_scores)
  mean_roc_auc = np.mean(fold_roc_auc)
  mean_pr_auc = np.mean(fold_pr_auc)
  print(f"Mean Accuracy: {mean_accuracy:.2f}")
  print(f"Mean Precision: {mean_precision:.2f}")
  print(f"Mean Recall: {mean_recall:.2f}")
  print(f"Mean Specificity: {mean_specificity:.2f}")
  print(f"Mean F1 Score: {mean_f1:.2f}")
  print(f"Mean ROC AUC: {mean_roc_auc:.2f}")
  print(f"Mean PR AUC: {mean_pr_auc:.2f}")

In [None]:
# set hyperparameters to be used
num_epochs = 20
batch_size = 32 # can try 16 or 64 also
learning_rate = 0.001 # lr between 0.0001 and 0.01

# Define cross-validation
k = 5
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # set device to cuda if using T4 GPU
print(device)

data = [item[0] for item in dataset.samples]
data_labels = [item[1] for item in dataset.samples]

augmentation = Augmentation()

# Initialize lists to store metrics for each fold
fold_accuracies = []
fold_precisions = []
fold_recalls = []
fold_specificities = []
fold_f1_scores = []
fold_roc_auc = []
fold_pr_auc = []

for fold, (train_index, val_index) in enumerate(skf.split(data, data_labels)):
  print(f"Training Fold {fold + 1}/{k}")

  # Split the dataset into training and validation sets for this fold
  train_dataset = torch.utils.data.Subset(dataset, train_index)
  val_dataset = torch.utils.data.Subset(dataset, val_index)

  class_counts = dict(Counter(dataset.targets[i] for i in train_index) )

  augmented_images = []
  for img_label in train_dataset:
    augmented_images.extend(augmentation(img_label))

  transformed_val_dataset = []
  for img_label in val_dataset:
    img, label = img_label
    transformed_img = validation_transform(img)
    transformed_val_dataset.append((transformed_img, label))

  # Create data loaders for training and validation
  train_loader = DataLoader(augmented_images, batch_size=batch_size, shuffle=True, num_workers=2)
  val_loader = DataLoader(transformed_val_dataset, batch_size=batch_size, num_workers=2)
  print(f"initial training dataset size: {len(train_dataset)}, post-transformation training dataset size: {len(augmented_images)}, validation dataset size: {len(transformed_val_dataset)}")

  # Initialize the model
  model = CNN()
  model.to(device)

  # Define loss function and optimizer
  weight_for_0 = sum(class_counts.values()) / (class_counts[0] *  2.0) # total_samples / (num_samples_in_class_i * num_classes)
  weight_for_1 = sum(class_counts.values()) / (class_counts[1] *  2.0)
  weight = torch.tensor([weight_for_0, weight_for_1]).to(device)
  criterion = nn.BCEWithLogitsLoss(weight=weight)
  optimizer = optim.Adam(model.parameters(), lr=learning_rate) # can try SGD

  # Train model
  for epoch in range(num_epochs):
    running_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    avg_loss = running_loss/len(train_loader.sampler) # average loss over the epoch
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}')

  # Validation
  y_pred_probas, y_pred, y_true = validate(model, val_loader, criterion, device)
  accuracy, precision, recall, f1, specificity, roc_auc, pr_auc = calculate_evaluation_metrics(y_true, y_pred, y_pred_probas)

  fold_accuracies.append(accuracy)
  fold_precisions.append(precision)
  fold_recalls.append(recall)
  fold_specificities.append(specificity)
  fold_f1_scores.append(f1)
  fold_roc_auc.append(roc_auc)
  fold_pr_auc.append(pr_auc)

  print(f"Validation Accuracy (Fold {fold + 1}/{k}): {accuracy:.2f}")
  print(f"Precision (Fold {fold + 1}/{k}): {precision:.2f}")
  print(f"Recall (Fold {fold + 1}/{k}): {recall:.2f}")
  print(f"Specificity (Fold {fold + 1}/{k}): {specificity:.2f}")
  print(f"F1 Score (Fold {fold + 1}/{k}): {f1:.2f}")
  print(f"ROC AUC (Fold {fold + 1}/{k}): {roc_auc:.2f}")
  print(f"PR AUC (Fold {fold + 1}/{k}): {pr_auc:.2f}")

cuda:0
Training Fold 1/5
initial training dataset size: 184, post-transformation training dataset size: 1840, validation dataset size: 46
Epoch 1/20, Loss: 1.4762391251066456
Epoch 2/20, Loss: 0.710885253937348
Epoch 3/20, Loss: 0.4693754337404085
Epoch 4/20, Loss: 0.3190202355384827
Epoch 5/20, Loss: 0.4319957797941954
Epoch 6/20, Loss: 0.281138444011626
Epoch 7/20, Loss: 0.1616867119203443
Epoch 8/20, Loss: 0.13250214146531147
Epoch 9/20, Loss: 0.08850141000002623
Epoch 10/20, Loss: 0.10916643856579195
Epoch 11/20, Loss: 0.09241064887331879
Epoch 12/20, Loss: 0.0499498567989339
Epoch 13/20, Loss: 0.02818650908606208
Epoch 14/20, Loss: 0.04147751129272839
Epoch 15/20, Loss: 0.2851339775664003
Epoch 16/20, Loss: 0.07182457245805342
Epoch 17/20, Loss: 0.05120314734825945
Epoch 18/20, Loss: 0.010833600513718051
Epoch 19/20, Loss: 0.009176806433369284
Epoch 20/20, Loss: 0.0041595737206871096
Validation Accuracy (Fold 1/5): 0.80
Precision (Fold 1/5): 0.84
Recall (Fold 1/5): 0.87
Specificit

In [None]:
# Calculate and print the mean accuracies across folds
print_validation_performance(fold_accuracies, fold_precisions, fold_recalls, fold_specificities, fold_f1_scores, fold_roc_auc, fold_pr_auc)

Mean Accuracy: 0.80
Mean Precision: 0.84
Mean Recall: 0.86
Mean Specificity: 0.67
Mean F1 Score: 0.85
Mean ROC AUC: 0.86
Mean PR AUC: 0.90


### Hyperparameter Tuning

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# to update best parameters possible for each metric
def update_best_params(metric, best_params, score, epochs, batch_size, lr):
  best_params[metric]['score'] = score
  best_params[metric]['num_epochs'] = epochs
  best_params[metric]['batch_size'] = batch_size
  best_params[metric]['learning_rate'] = lr
  return best_params

In [None]:
def tune_hyperparameters(dataset, k, epoch_list, batch_list, lr_list, device):
  """
  Tune hyperparemeters for Resnet model.

  Parameters:
  - dataset (List): The list of tuples containing the training data, where each tuple is (PIL image, label).
  - k (int): Number of folds to be used for cross validation
  - epoch_list (List): The list of number of epochs to be explored for hyperparameter tuning.
  - batch_list (List): The list of batch sizes to be explored for hyperparameter tuning.
  - lr_list (List): The list of learning rates to be explored for hyperparameter tuning.
  - device (torch.device): The device (CPU or GPU) on which the validation will be performed.

  Returns:
  - List: A list of dictionaries, 'best_params' and 'params_result'.
    - best_params (Dict): contains the best parameters for each metric
    - params_result (Dict): contains all evaluation metrics results for all combinations of hyperparameters
  """
  params_result = {'hyperparameters': [], # (epochs, batch_size, learning_rate)
                   'accuracy': [],
                   'precision': [],
                   'recall': [],
                   'specificity': [],
                   'f1': [],
                   'roc_auc': [],
                   'pr_auc': []}

  best_params = {'accuracy': {'score': float('-inf')},
                 'precision': {'score': float('-inf')},
                 'recall': {'score': float('-inf')},
                 'specificity': {'score': float('-inf')},
                 'f1': {'score': float('-inf')},
                 'roc_auc': {'score': float('-inf')},
                 'pr_auc': {'score': float('-inf')}}

  data = [item[0] for item in dataset]
  data_labels = [item[1] for item in dataset]
  skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
  augmentation = Augmentation()

  for batch_size in batch_list:
    for num_epochs in epoch_list:
      for learning_rate in lr_list:
        print(f"Trying hyperparameters: {num_epochs} epochs, {batch_size} batch_size, {learning_rate} learning rate")

        fold_accuracies, fold_precisions, fold_recalls, fold_specificities, fold_f1_scores = [], [], [], [], []
        fold_roc_auc, fold_pr_auc = [], []

        for fold, (train_index, val_index) in enumerate(skf.split(data, data_labels)):
          # print(f"Training Fold {fold + 1}/{k}")

          # initialize lists to store metrics for each fold
          train_dataset = torch.utils.data.Subset(dataset, train_index)
          val_dataset = torch.utils.data.Subset(dataset, val_index)

          class_counts = dict(Counter(dataset[i][1] for i in train_index))

          augmented_images = []
          for img_label in train_dataset:
            augmented_images.extend(augmentation(img_label))

          transformed_val_dataset = []
          for img_label in val_dataset:
              img, label = img_label
              transformed_img = validation_transform(img)
              transformed_val_dataset.append((transformed_img, label))

          # create data loaders for training and validation
          train_loader = DataLoader(augmented_images, batch_size=batch_size, shuffle=True, num_workers=2)
          val_loader = DataLoader(transformed_val_dataset, batch_size=batch_size, num_workers=2)

          # initialize the model
          model = CNN()
          model.to(device)

          # define loss function and optimizer
          weight_for_0 = sum(class_counts.values()) / (class_counts[0] *  2.0) # total_samples / (num_samples_in_class_i * num_classes)
          weight_for_1 = sum(class_counts.values()) / (class_counts[1] *  2.0)
          weight = torch.tensor([weight_for_0, weight_for_1]).to(device)
          criterion = nn.BCEWithLogitsLoss(weight=weight)
          optimizer = optim.Adam(model.parameters(), lr=learning_rate) # can try SGD

          # train model
          for epoch in range(num_epochs):
            running_loss = train_epoch(model, train_loader, criterion, optimizer, device)
            avg_loss = running_loss/len(train_loader.sampler) # average loss over the epoch
            # print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}')

          # validation
          y_pred_probas, y_pred, y_true = validate(model, val_loader, criterion, device)
          accuracy, precision, recall, f1, specificity, roc_auc, pr_auc = calculate_evaluation_metrics(y_true, y_pred, y_pred_probas)

          fold_accuracies.append(accuracy)
          fold_precisions.append(precision)
          fold_recalls.append(recall)
          fold_specificities.append(specificity)
          fold_f1_scores.append(f1)
          fold_roc_auc.append(roc_auc)
          fold_pr_auc.append(pr_auc)

        # calculate and store the mean metrics for this hyperparameter combination
        mean_accuracy = np.mean(fold_accuracies)
        mean_precision = np.mean(fold_precisions)
        mean_recall = np.mean(fold_recalls)
        mean_specificity = np.mean(fold_specificities)
        mean_f1_score = np.mean(fold_f1_scores)
        mean_roc_auc = np.mean(fold_roc_auc)
        mean_pr_auc = np.mean(fold_pr_auc)

        params_result['hyperparameters'].append((num_epochs, batch_size, learning_rate))
        params_result['accuracy'].append(mean_accuracy)
        params_result['precision'].append(mean_precision)
        params_result['recall'].append(mean_recall)
        params_result['specificity'].append(mean_specificity)
        params_result['f1'].append(mean_f1_score)
        params_result['roc_auc'].append(mean_roc_auc)
        params_result['pr_auc'].append(mean_pr_auc)

        print(f"Accuracy: {mean_accuracy:.2f}")
        print(f"Precision: {mean_precision:.2f}")
        print(f"Recall: {mean_recall:.2f}")
        print(f"Specificity: {mean_specificity:.2f}")
        print(f"F1 Score: {mean_f1_score:.2f}")
        print(f"ROC-AUC: {mean_roc_auc:.2f}")
        print(f"PR-AUC: {mean_pr_auc:.2f}")
        print('----------------------------------------------------------------')

        # update best params dict
        if mean_accuracy > best_params['accuracy']['score']:
          best_params = update_best_params('accuracy', best_params, mean_accuracy, num_epochs, batch_size, learning_rate)
        if mean_precision > best_params['precision']['score']:
          best_params = update_best_params('precision', best_params, mean_precision, num_epochs, batch_size, learning_rate)
        if mean_recall > best_params['recall']['score']:
          best_params = update_best_params('recall', best_params, mean_recall, num_epochs, batch_size, learning_rate)
        if mean_specificity > best_params['specificity']['score']:
          best_params = update_best_params('specificity', best_params, mean_specificity, num_epochs, batch_size, learning_rate)
        if mean_f1_score > best_params['f1']['score']:
          best_params = update_best_params('f1', best_params, mean_f1_score, num_epochs, batch_size, learning_rate)
        if mean_roc_auc > best_params['roc_auc']['score']:
          best_params = update_best_params('roc_auc', best_params, mean_roc_auc, num_epochs, batch_size, learning_rate)
        if mean_pr_auc > best_params['pr_auc']['score']:
          best_params = update_best_params('pr_auc', best_params, mean_pr_auc, num_epochs, batch_size, learning_rate)

  print(f"Best hyperparameters: {best_params}")

  return best_params, params_result

In [None]:
# Split dataset to 80% train, 20% test
y = [item[1] for item in dataset.samples]
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, shuffle=True, stratify=y)
print(len(train_dataset))
print(len(test_dataset))

184
46


In [None]:
# hyperparameters for tuning
epoch_list = [10, 20, 50]
batch_list = [32, 64]
lr_list = [0.001, 0.01]
k = 5

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

best_params, params_result = tune_hyperparameters(train_dataset, k, epoch_list, batch_list, lr_list, device)

cuda:0
Trying hyperparameters: 10 epochs, 32 batch_size, 0.001 learning rate
Accuracy: 0.76
Precision: 0.85
Recall: 0.78
Specificity: 0.71
F1 Score: 0.81
ROC-AUC: 0.79
PR-AUC: 0.86
----------------------------------------------------------------
Trying hyperparameters: 10 epochs, 32 batch_size, 0.01 learning rate
Accuracy: 0.76
Precision: 0.81
Recall: 0.83
Specificity: 0.63
F1 Score: 0.82
ROC-AUC: 0.76
PR-AUC: 0.83
----------------------------------------------------------------
Trying hyperparameters: 20 epochs, 32 batch_size, 0.001 learning rate
Accuracy: 0.77
Precision: 0.83
Recall: 0.83
Specificity: 0.66
F1 Score: 0.83
ROC-AUC: 0.81
PR-AUC: 0.88
----------------------------------------------------------------
Trying hyperparameters: 20 epochs, 32 batch_size, 0.01 learning rate
Accuracy: 0.74
Precision: 0.83
Recall: 0.77
Specificity: 0.68
F1 Score: 0.80
ROC-AUC: 0.80
PR-AUC: 0.88
----------------------------------------------------------------
Trying hyperparameters: 50 epochs, 32 b

Best hyperparameters:
- 'accuracy': {'score': 0.788138138138138, 'num_epochs': 50, 'batch_size': 64, 'learning_rate': 0.001},
- 'precision': {'score': 0.8529523809523809, 'num_epochs': 20, 'batch_size': 64, 'learning_rate': 0.001},
- 'recall': {'score': 0.8686666666666666, 'num_epochs': 10, 'batch_size': 64, 'learning_rate': 0.001},
- 'specificity': {'score': 0.7756410256410255, 'num_epochs': 20, 'batch_size': 64, 'learning_rate': 0.01},
- 'f1': {'score': 0.8406944368866881, 'num_epochs': 50, 'batch_size': 64, 'learning_rate': 0.001},
- 'roc_auc': {'score': 0.8202136752136753, 'num_epochs': 50, 'batch_size': 64, 'learning_rate': 0.001},
- 'pr_auc': {'score': 0.8796929131155805, 'num_epochs': 50, 'batch_size': 64, 'learning_rate': 0.001}}

### Training on Tuned Hyperparameters
- we picked the parameters that gave the best `pr_auc`

In [None]:
# best hyperparameters for pr_auc
num_epochs = 50
batch_size = 64
learning_rate = 0.001

augmentation = Augmentation()

class_counts = dict(Counter(train_dataset[i][1] for i in range(len(train_dataset))))

# use train_dataset and test_dataset split during hyperparameter tuning
augmented_images = []
for img_label in train_dataset:
  augmented_images.extend(augmentation(img_label))

transformed_test_dataset = []
for img_label in test_dataset:
    img, label = img_label
    transformed_img = validation_transform(img)
    transformed_test_dataset.append((transformed_img, label))

train_loader = DataLoader(augmented_images, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(transformed_test_dataset, batch_size=batch_size, num_workers=2)

# initialize the model
model = CNN()
model.to(device)

# define loss function and optimizer
weight_for_0 = sum(class_counts.values()) / (class_counts[0] *  2.0) # total_samples / (num_samples_in_class_i * num_classes)
weight_for_1 = sum(class_counts.values()) / (class_counts[1] *  2.0)
weight = torch.tensor([weight_for_0, weight_for_1]).to(device)
criterion = nn.BCEWithLogitsLoss(weight=weight)
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # can try SGD

# train model
for epoch in range(num_epochs):
  running_loss = train_epoch(model, train_loader, criterion, optimizer, device)
  avg_loss = running_loss/len(train_loader.sampler) # average loss over the epoch
  print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}')

# test
y_pred_probas, y_pred, y_true = validate(model, test_loader, criterion, device)
accuracy, precision, recall, f1, specificity, roc_auc, pr_auc = calculate_evaluation_metrics(y_true, y_pred, y_pred_probas)
print('----------------------------------------------------------------')
print(f"Test Accuracy: {accuracy:.2f}")
print(f"Test Precision: {precision:.2f}")
print(f"Test Recall: {recall:.2f}")
print(f"Test Specificity: {specificity:.2f}")
print(f"Test F1 Score: {f1:.2f}")
print(f"Test ROC-AUC: {roc_auc:.2f}")
print(f"Test PR-AUC: {pr_auc:.2f}")

Epoch 1/50, Loss: 2.982341118480848
Epoch 2/50, Loss: 0.6983329721119093
Epoch 3/50, Loss: 0.5292163009228914
Epoch 4/50, Loss: 0.39814657024715255
Epoch 5/50, Loss: 0.3317219508730847
Epoch 6/50, Loss: 0.43021017157513164
Epoch 7/50, Loss: 0.22582132919974948
Epoch 8/50, Loss: 0.2997489799623904
Epoch 9/50, Loss: 0.18272083200838254
Epoch 10/50, Loss: 0.15409185951170715
Epoch 11/50, Loss: 0.1149463133967441
Epoch 12/50, Loss: 0.08573109961074332
Epoch 13/50, Loss: 0.06917280619559081
Epoch 14/50, Loss: 0.09758652565271958
Epoch 15/50, Loss: 0.07112959509958391
Epoch 16/50, Loss: 0.049172319633805237
Epoch 17/50, Loss: 0.03209486014169195
Epoch 18/50, Loss: 0.019186994805932046
Epoch 19/50, Loss: 0.01673772223293781
Epoch 20/50, Loss: 0.011437523956208125
Epoch 21/50, Loss: 0.008942639981598957
Epoch 22/50, Loss: 0.011053466246179913
Epoch 23/50, Loss: 0.0073679758559750475
Epoch 24/50, Loss: 0.006222996423425882
Epoch 25/50, Loss: 0.005684048714845077
Epoch 26/50, Loss: 0.00565845861

- Test Accuracy: 0.85
- Test Precision: 0.90
- Test Recall: 0.87
- Test Specificity: 0.80
- Test F1 Score: 0.89
- Test ROC-AUC: 0.82
- Test PR-AUC: 0.89