# Packages

In [None]:
# Global packages
import os
import random
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
%matplotlib inline

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, roc_curve, roc_auc_score, auc
from sklearn.utils import shuffle

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from torchvision import models, transforms
from torch.utils.data import DataLoader, Subset
from torch.utils.data.sampler import SubsetRandomSampler

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Loading Dataset

In [None]:
normalize = transforms.Normalize(
    mean=[0.3322, 0.0275, 0.1132],
    std=[0.2215, 0.0965, 0.3152],
)

transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
])

# Load the dataset
dataset = datasets.ImageFolder("./Dataset_BUSI/Dataset_BUSI_with_GT/train/", transform=transform)

# Get the labels from the dataset
labels = np.array(dataset.targets)

# Split the dataset into train and test sets while maintaining class proportions
train_indices, test_indices = train_test_split(np.arange(len(dataset)), test_size=0.2, random_state=42, stratify=labels)

# Create Subset datasets for train and test
train_dataset = Subset(dataset, train_indices)
test_dataset = Subset(dataset, test_indices)

# Creating Optimizer
Currently the optimizer is an **Adam Optimizer**

In [None]:
def create_optimizer(model,
                    learning_rate_pretrained,
                    learning_rate_new,
                    weight_decay,
                    beta1=0.9,  # Adam parameter
                    beta2=0.999,  # Adam parameter
                    eps=1e-8,  # Adam parameter
                    amsgrad=False):  # Adam parameter
    params_pretrained = []
    params_new = []
    for name, param in model.named_parameters():
        if 'classifier.6' in name:
            params_new.append(param)
        else:
            params_pretrained.append(param)

    '''Setting Loss Functions and Optimizers'''
    optimizer = optim.Adam([
        {'params': params_pretrained, 'lr': learning_rate_pretrained},
        {'params': params_new, 'lr': learning_rate_new},
    ], weight_decay=weight_decay, betas=(beta1, beta2), eps=eps, amsgrad=amsgrad)

    return optimizer

# Train Function
Training function for the VGG16 model.

In [None]:
def training(model, device, train_loader, criterion, optimizer):
    """
    Training function for a neural network model.

    Args:
        model (nn.Module): The neural network model to be trained.
        device (torch.device): The device (e.g., 'cuda' or 'cpu') to use for training.
        train_loader (DataLoader): DataLoader containing training data.
        criterion: The loss function.
        optimizer: The optimization algorithm.

    Returns:
        float: The average training loss for one epoch.
    """

    # Move the model to the specified device
    model.to(device)
    # Set the model in training mode
    model.train()

    # Initialize variables to keep track of training loss and total samples
    train_loss, total_samples = 0.0, 0

    # Loop through the batches in the training data
    for images, labels in train_loader:
        # Move data to the specified device
        images, labels = images.to(device), labels.to(device)

        # Zero the gradients of the optimizer
        optimizer.zero_grad()

        # Forward pass through the model
        outputs = model(images)
        # Calculate the loss
        loss = criterion(outputs, labels.unsqueeze(1).float())

        # Backpropagation: Compute gradients
        loss.backward()
        # Update the model's parameters
        optimizer.step()

        # Accumulate the training loss for the current batch
        train_loss += loss.item() * images.size(0)
        # Count the total number of processed samples
        total_samples += labels.size(0)

    # Calculate the average training loss for the entire epoch
    avg_train_loss = train_loss / total_samples

    return avg_train_loss

# Testing Function (Evaluation)
Evaluation function for the VGG16 model.

In [None]:
def testing(model, device, val_loader, criterion):
    """
    Evaluation function for a neural network model.

    Args:
        model (nn.Module): The neural network model to be evaluated.
        device (torch.device): The device (e.g., 'cuda' or 'cpu') to use for evaluation.
        val_loader (DataLoader): DataLoader containing validation data.
        criterion: The loss function.

    Returns:
        float: The average validation loss.
    """

    # Move the model to the specified device
    model.to(device)
    # Set the model in evaluation mode
    model.eval()

    # Initialize variables to keep track of validation loss and total samples
    val_loss, total_samples = 0.0, 0

    # Disable gradient calculation during evaluation
    with torch.no_grad():
        # Loop through the batches in the validation data
        for images, labels in val_loader:
            # Move data to the specified device
            images, labels = images.to(device), labels.to(device)
            # Forward pass through the model
            outputs = model(images)
            # Calculate the loss
            loss = criterion(outputs, labels.unsqueeze(1).float())

            # Accumulate the validation loss for the current batch
            val_loss += loss.item() * labels.size(0)
            # Count the total number of processed samples
            total_samples += labels.size(0)

    # Calculate the average validation loss
    avg_val_loss = val_loss / total_samples

    return avg_val_loss

# Creating Train and Validation Loaders
Creates data loaders for training and validation using given indices.

In [None]:
def create_train_val_loader(train_dataset, train_idx, val_idx, batch_size):
    """
    Creates data loaders for training and validation using given indices.

    Args:
        train_dataset (Dataset): The training dataset.
        train_idx (list): List of indices for the training set.
        val_idx (list): List of indices for the validation set.
        batch_size (int): Batch size for the data loaders.

    Returns:
        train_loader (DataLoader): DataLoader for the training data.
        val_loader (DataLoader): DataLoader for the validation data.
    """

    # Create subset samplers for training and validation indices
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsampler = torch.utils.data.SubsetRandomSampler(val_idx)

    # Check for overlapping indices between train and validation sets
    assert len(set(train_idx).intersection(val_idx)) == 0, "Indices overlap between train and validation sets."

    # Define data loaders for training and validation data in this fold
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        sampler=train_subsampler,
        shuffle=True  # Shuffle the training data for each epoch
    )

    val_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        sampler=val_subsampler
    )

    return train_loader, val_loader


# Hyperparameters and Search Space
Define hyperparameters and search space for hyperparameter tuning

In [None]:
# Define hyperparameters and search space for hyperparameter tuning
param_search_space = {
    'num_frozen_layers': (4, 7),
    'learning_rate_pretrained': (0.00001, 0.001),
    'learning_rate_new': (0.00001, 0.001),
    'weight_decay': (0.005, 0.01)
}

# Number of random configurations to try during hyperparameter search
num_random_configs = 20

# Set training parameters
num_epochs = 20  # Number of training epochs
batch_size = 32  # Batch size for training
k = 5  # Number of folds for cross-validation

# Create a dictionary to store fold results
fold_results = {
    'train_loss': [],
    'val_loss': []
}

# Create a StratifiedKFold cross-validator with specified number of folds
skfold = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

# Define the loss function for the model
criterion = nn.BCEWithLogitsLoss()  # Binary Cross Entropy Loss with Sigmoid Function

# Hyperparameter Tuning
The code iterates through cross-validation folds, tests various hyperparameter settings, and records the best configuration with the lowest average validation loss for each fold. <br>

I am trying to get through as many hyperparameter settings as I can, so I am not doing CVs for each randomized hyperparameter but rather looking at random hyperparameter settings for each fold. This allows me to go through a large number of hyperparameters while saving computational time and resources.

In [None]:
# Create a list to store best configurations and average validation losses per fold
best_config_per_fold = []

# Loop through the folds using StratifiedKFold
for fold, (train_idx, val_idx) in enumerate(skfold.split(train_dataset, labels[train_indices])):

    # Create data loaders for training and validation using the current fold indices
    train_loader, val_loader = create_train_val_loader(train_dataset, train_idx, val_idx, batch_size)

    # Create a list to store configurations and their corresponding average validation losses for this fold
    fold_configs = []

    # Try different random configurations for hyperparameters
    for _ in range(num_random_configs):
        # Initialize the best validation loss for the current configuration
        epoch_best_val_loss = float('inf')

        # Initialize a VGG16 model pre-trained on ImageNet
        model = models.vgg16(pretrained=True)

        # Randomly sample hyperparameters from the specified intervals
        random_config = {
            param: random.uniform(min_val, max_val) for param, (min_val, max_val) in param_search_space.items()
        }

        # Extract hyperparameters from the random configuration
        num_frozen_layers = int(random_config['num_frozen_layers'])
        learning_rate_pretrained = random_config['learning_rate_pretrained']
        learning_rate_new = random_config['learning_rate_new']
        weight_decay = random_config['weight_decay']

        # Create an optimizer with the sampled hyperparameters
        optimizer = create_optimizer(model,
                                     learning_rate_pretrained=learning_rate_pretrained,
                                     learning_rate_new=learning_rate_new,
                                     weight_decay=weight_decay)

        # Print the current hyperparameter configuration
        print("Num Frozen Layers:", num_frozen_layers)
        print("Learning Rate Pretrained:", "{:.8f}".format(learning_rate_pretrained))
        print("Learning Rate New:", "{:.8f}".format(learning_rate_new))
        print("Weight Decay:", "{:.8f}".format(weight_decay))

        # Replace the last fully connected layer to match the binary classification task
        in_features = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(in_features=4096, out_features=1)

        # Freeze some layers based on the specified number
        for layer in model.features[:num_frozen_layers]:
            for param in layer.parameters():
                param.requires_grad = False

        # Lists to store current configuration's average training and validation losses
        current_config_avg_train_losses = []
        current_config_avg_val_losses = []

        # Train and validate the model for the specified number of epochs
        for epoch in range(num_epochs):
            avg_train_loss = training(model, device, train_loader, criterion, optimizer)
            avg_val_loss = testing(model, device, val_loader, criterion)

            print("Epoch:{}/{} AVG Training Loss:{:.6f} AVG Val Loss:{:.6f}".format(epoch + 1, num_epochs, avg_train_loss, avg_val_loss))

            # Update best validation loss and save model weights if necessary
            if avg_val_loss < epoch_best_val_loss:
                epoch_best_val_loss = avg_val_loss

            # Store current epoch's average training and validation losses
            current_config_avg_train_losses.append(avg_train_loss)
            current_config_avg_val_losses.append(avg_val_loss)

        # Store the configuration and its average validation loss for this fold
        fold_configs.append((random_config, np.mean(current_config_avg_val_losses)))

    # Find the best configuration with the lowest average validation loss for this fold
    fold_best_val_loss = float('inf')
    fold_best_config = None
    for config, avg_val_loss in fold_configs:
        if avg_val_loss < fold_best_val_loss:
            fold_best_val_loss = avg_val_loss
            fold_best_config = config

    # Store the best configuration and its associated average validation loss for this fold
    best_config_per_fold.append((fold_best_config, fold_best_val_loss))
