## Hyperparameter Tuning Notebook

Description:
1. This notebook offers an automatic way to tune model trianing hyperparameter
2. A log file will be generated for the hyperparameter search

Prerequisite:
1. Make sure the `hyperopt` library is installed
2. Define the search space of the algorithm

Information:
1. By default, the searching algorithm is TPE


## Imports

- importing all torch sub packages. torchvision will be useful to load some CNN architectures and pretrained weights (generally trained on ImageNet)
- SciPy for stats
- maplotlib for ploting curves and images
- tqdm for ploting progress bars
- torchmetrics helps to compute the confusion matrix
- seaborn for ploting the confusion matrix
- pickle for saving/loading stat files (loss, acc, etc)

In [None]:
import numpy as np
import os
import gc

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as T
import cv2 as cv
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.optim.lr_scheduler import _LRScheduler
import torch.nn.functional as F

# for evaluation 
from torchmetrics import ConfusionMatrix
import pandas as pd
import seaborn as sn

import json
import time

import shutil

default_matplotlib_backend = matplotlib.get_backend()
print('imported')
print('default_matplotlib_backend: {}'.format(default_matplotlib_backend))


## Constants
What you should know :
- Fixing the seed helps to get the same behavious and avoid anomalies when changing operating systems ...
- Weights and stats will be saved on their own folders inside the root folder
- npy data are used to load quicly the images into memory (reading files from drive takes too much time and slows training)
- fixing other constants like "backbone" and "optimizer", this will decide which architecture and which optimizer will be user for the training so modify only those constants and nothing else

In [None]:
RUN_MODE = ['DEV','LIVE'][1]

SIMPLE_PATH = False  # Set this to false if you want to use custom paths

In [None]:
from copy import deepcopy

# Ensure consistent data distribution across different machines
np.random.seed(42)

# Determine if the operating system is Windows
windows = (os.name == 'nt')

# Define different paths to data directories
# extracted data: contains images inside folders
# weights: stores trained model weights
# stats: holds statistics (e.g., loss, accuracy)
# npy_data: stores data in .npy format for memory efficiency
# model_save_path: path to save trained models
if windows or SIMPLE_PATH:
    root_path = os.getcwd() # root project folder
    extracted_data_path = os.path.join(root_path, "datasets", "dataset_4a_n_crop")
    weights_path = os.path.join(root_path, "weights")
    stats_path = os.path.join(root_path, "stats")
    npy_data_path = os.path.join(root_path, "npy_data")
    model_save_path = os.path.join(root_path, "model")
else:
    root_path = ""
    extracted_data_path = ""
    weights_path =  ""
    stats_path = ""
    npy_data_path = ""
    model_save_path = ""

# Create directories if they do not exist
def create_directory_if_not_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

# Create necessary directories
create_directory_if_not_exists(npy_data_path)
create_directory_if_not_exists(weights_path)
create_directory_if_not_exists(stats_path)
create_directory_if_not_exists(model_save_path)

# Get the list of classes
class_list = os.listdir(extracted_data_path)[:3] if RUN_MODE == "DEV" else os.listdir(extracted_data_path)
class_list.sort()
print('Number of classes: {}'.format(len(class_list)))

# Determine the working device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Selected device: {}'.format(device))
if device.type == 'cuda':
    print('Device name: {}'.format(torch.cuda.get_device_name(device)))

# Define other constants
BATCH_SIZE = 16 if RUN_MODE == "DEV" else 64
EPOCHS = 3 if RUN_MODE == "DEV" else 30
IMG_SIZE = (350, 350)
RESIZE_SHAPE = (350, 350)
NUM_CHANNELS = 3
BACKBONE = 'ResNet18'
OPTIMIZER = 'SGD'

# Available backbones and optimizers
available_backbones = [
    'AlexNet', 'ResNet18', 'ResNet34', 'ResNet101', 'InceptionNet', 'EfficientNetB0', 'EfficientNetB1', 'EfficientNetB2',
    'EfficientNetV2_S', 'ConvNext_T', 'MobileNet_V3_Small', 'MobileNet_V3_Large', 'ViT_B_16'
]
available_optimizers = ['SGD', 'Adam', 'AdamW', 'RMSprop']

# Assert the chosen backbone and optimizer are available
assert BACKBONE in available_backbones, 'Choose an available backbone'
assert OPTIMIZER in available_optimizers, 'Choose an available optimizer'

## Data loading
1. loading all paths to files with corresponding class    
2. if npy already exists, then load it, otherwise load the dataset in memory  and save it as npy  
3. load npy  
4. split it as train, test, validation sets  

In [None]:
# Specify which subdataset to use
DATASET_GROUP_IDX = None  # Could be: None, 0, 1, 2
train_valid_test_split_json_name = 'NEW25split.json'

# Available subdatasets
indep1 = [18, 9, 26, 29, 34]
dataset_groups = [
    [1, 7, 10, 14, 11, 32, 13, 4, 8, 17, 20, 25, 28, 31, 36],
    [0, 15, 21, 22, 23, 5],
    [37, 33, 16, 2, 3, 6, 12, 19, 24, 27, 30, 35] + indep1,
]

# Print number of classes
class_list = os.listdir(extracted_data_path)
class_list.sort()
print('Number of classes: {}'.format(len(class_list)))

# Loading data
print('Loading data ...')
image_files = []
targets = []

# Dictionary to map class_name to class_index
class_dict = {class_name: i for i, class_name in enumerate(class_list)}

# Load all image_paths
for class_name in class_dict.keys():
    repetitions_list = os.listdir(os.path.join(extracted_data_path, class_name))
    repetitions_list.sort()
    for repetition in repetitions_list:
        image_list = os.listdir(os.path.join(extracted_data_path, class_name, repetition))
        image_list.sort()
        image_files.extend(
            [os.path.join(extracted_data_path, class_name, repetition, img) for img in image_list]
        )
        targets.extend([class_dict[class_name]] * len(image_list))

targets = np.array(targets)

# Save data as .npy files if they do not exist
if not os.path.exists(os.path.join(npy_data_path, 'images.npy')):
    print('Saving data as .npy files to {}'.format(npy_data_path))
    images = np.empty((len(image_files), NUM_CHANNELS, 350, 350), dtype=np.uint8)
    for idx, img_path in enumerate(tqdm(image_files, position=0, leave=True)):
        img = cv.imread(img_path)
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        img = cv.resize(img, (350, 350))
        img = img.transpose(2, 0, 1)
        images[idx] = img

    with open(os.path.join(npy_data_path, 'images.npy'), 'wb') as npy_images_file, \
         open(os.path.join(npy_data_path, 'targets.npy'), 'wb') as npy_targets_file:
        np.save(npy_images_file, images, allow_pickle=True)
        np.save(npy_targets_file, targets, allow_pickle=True)
    with open(os.path.join(npy_data_path, 'image_files.json'), 'w') as f:
        json.dump(image_files, f)
    print('Data files saved')
else:
    images = np.load(os.path.join(npy_data_path, 'images.npy'))
    targets = np.load(os.path.join(npy_data_path, 'targets.npy'))
    print('Data files loaded')

# Apply train/valid/test split from JSON
with open(os.path.join(root_path, train_valid_test_split_json_name), 'r') as f:
    image_files_dict_cut = json.load(f)

classes_to_use = sorted(list(image_files_dict_cut.keys()))
mapper_allcls_to_subcls = {class_dict[cls_name]: i for i, cls_name in enumerate(classes_to_use)}
class_dict = {cls_name: i for i, cls_name in enumerate(classes_to_use)}
class_list = [cls_name for i, cls_name in enumerate(classes_to_use)]

new_targets = []
new_image_files = []
for t, im in zip(targets, image_files):
    if t in mapper_allcls_to_subcls:
        new_targets.append(mapper_allcls_to_subcls[t])
        new_image_files.append(im)
new_targets = np.array(new_targets)
targets = new_targets
image_files = new_image_files

train_images_idxs, val_images_idxs, test_images_idxs = [], [], []

# Split images into train, validation, and test sets
for image_file_idx, image_file_name in enumerate(image_files):
    image = images[image_file_idx]
    target = targets[image_file_idx]

    _class, rep_num, im_name = image_file_name.split(os.sep)[-3:]
    
    if (_class not in image_files_dict_cut) or (rep_num not in image_files_dict_cut[_class]):
        continue
    if image_files_dict_cut[_class][rep_num] == 'train':
        train_images_idxs.append(image_file_idx)
    elif image_files_dict_cut[_class][rep_num] == 'valid':
        val_images_idxs.append(image_file_idx)
    elif image_files_dict_cut[_class][rep_num] == 'test':
        test_images_idxs.append(image_file_idx)

train_images = images[train_images_idxs]
train_images = images[train_images_idxs]
val_images   = images[val_images_idxs]
test_images  = images[test_images_idxs]

train_targets = targets[train_images_idxs]
val_targets   = targets[val_images_idxs]
test_targets  = targets[test_images_idxs]

# Assertions to check the lengths
assert len(train_images) == len(train_targets)
assert len(test_images) == len(test_targets)
assert len(val_images) == len(val_targets)

# Print the number of images in each set
print('Number of training images: {}'.format(len(train_images)))
print('Number of test images: {}'.format(len(test_images)))
print('Number of validation images: {}'.format(len(val_images)))

In [None]:
to_plot_from_train = 6

plt.figure(figsize=(10, 10))
for i in range(to_plot_from_train):
    plt.subplot(1, to_plot_from_train, i + 1)
    img = train_images[i].swapaxes(0,2)
    plt.imshow((img).astype(np.uint8))
    plt.title('Class: {}'.format(class_list[train_targets[i]]))
    plt.axis('off')

plt.show()

#### Dataset class and data loaders
By loading the whole dataset inside the memory, it because very easy to train the model. All we have to do is normalise the images (divide by 255 and change into C, W, H format) then return it with it's corresponding target class

- Data augmentation can be turned on/off by modifying this code (more details in comments at the end)

In [None]:
# Define whether to use data augmentation
USE_AUGMENTATION = True

# Define the dataset class for classification
class ClassificationPlantDataset(torch.utils.data.Dataset):
    def __init__(self, images, targets, transform=None):
        self.images = images
        self.targets = targets
        self.transform = transform

    def __getitem__(self, index):
        # Normalize image data
        image = torch.tensor(self.images[index]).type(torch.float32) / 255

        # Apply transformation if provided
        if self.transform:
            image = self.transform(image)

        target = torch.tensor(self.targets[index])
        return image, target

    def __len__(self):
        return len(self.images)

# Create train dataset
if USE_AUGMENTATION:
    train_transforms = T.Compose([
        # Data augmentation effects
        T.RandomAffine(degrees=(-180, 180), translate=(0.2, 0.2), scale=(0.8, 1.5)),
        T.RandomHorizontalFlip(),
        # T.RandomRotation([45, 270])
    ])
else:
    train_transforms = None

train_dataset = ClassificationPlantDataset(train_images, train_targets, transform=train_transforms)

# Create validation and test datasets
val_dataset = ClassificationPlantDataset(val_images, val_targets, transform=None)
test_dataset = ClassificationPlantDataset(test_images, test_targets, transform=None)

# Combine validation and test datasets
val_dataset = torch.utils.data.ConcatDataset([val_dataset, test_dataset])
test_dataset = val_dataset

# Data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=True, drop_last=False)

print('Data loaders created')


### Data sample (Run this only if you want to see an example of the data)


In [None]:
dataset = train_dataset

sample_size = 7
class_name = "Col-0"
img_idx = np.random.randint(0, len(dataset), size=75)
sample = [dataset[i] for i in img_idx]

plt.figure(figsize=(10, 10))
cnt = 0
for i in range(70):
    if cnt >= sample_size:
        break
    
    if class_name.lower() == class_list[sample[i][1]].lower():
        plt.subplot(1, sample_size, cnt + 1)
        img = sample[i][0].cpu().permute(1, 2, 0)
        plt.imshow((img * 255).type(torch.uint8))
        plt.title('Class: {}'.format(class_list[sample[i][1]]))
        plt.axis('off')
        cnt += 1

plt.show()

### LR Scheduler and utility functions

In [None]:
from pytorch_utils.training_utils import *

## Training and evaluation functions

In [None]:
def _handlezero_division_np(a,b):
    # initialize output tensor with desired value
    # c = torch.zeros_like(a)
    #c = torch.full_like(a, fill_value=float('nan'))
    # zero mask
    c = np.zeros_like(a)
    mask = (b != 0)
    # finally perform division
    c[mask] = a[mask] / b[mask]
    return c

def mathews_correlation_coefficient_np(tp, fp, fn, tn, eps=1e-11):
    tp = tp.sum().astype(np.float64)
    tn = tn.sum().astype(np.float64)
    fp = fp.sum().astype(np.float64)
    fn = fn.sum().astype(np.float64)
    _numerator = (tp*tn - fp*fn)
    _denomerator = np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
    x = _numerator / (_denomerator + eps)
    # x = _handlezero_division_np(_numerator, _denomerator)
    return x

In [None]:
import pytorch_utils.callbacks as pt_callbacks

def get_callbacks(
        optimiser,
        result,
        model,
        defined_callbacks=None,
        continue_training=False,
        other_stats=None
):

    if defined_callbacks is None:
        defined_callbacks = {
            'val': pt_callbacks.Callbacks(optimizer=optimiser,
                                          model_save_path=model_save_path + 'model.pth',
                                          training_stats_path=model_save_path + 'training_stats_val',
                                          continue_training=continue_training),

            'train': pt_callbacks.Callbacks(optimizer=optimiser,
                                            training_stats_path=model_save_path + 'training_stats_train',
                                            continue_training=continue_training)
        }

    defined_callbacks['val'].reduce_lr_on_plateau(
        monitor_value=result["val_acc"],
        mode='max',
        factor=0.5,
        patience=4,
        indicator_text="Val LR scheduler: "
    )
    defined_callbacks['val'].model_checkpoint(
        model=model,
        monitor_value=result["val_acc"],
        mode='max',
        indicator_text="Val checkpoint: "
    )
    stop_flag = defined_callbacks['val'].early_stopping(
        monitor_value=result["val_acc"],
        mode='max',
        patience=20,
        indicator_text="Early stopping: "
    )
    defined_callbacks['val'].clear_memory()
    print("_________")

    return defined_callbacks, stop_flag

In [None]:
import pytorch_utils.training_utils as pt_train
def train_loop(
        model,
        optimizer,
        epochs,
        train_loader,
        val_loader,
        model_save_folder,
        initial_lr=0.001,
        weight_decay=None,
        running_hyperopt=False,
        verbose=False,
):
    def get_result_list(history, metric):
        return [history[i][metric] for i in range(len(history))]

    # prep the model save path
    shutil.rmtree(model_save_folder, ignore_errors=True)
    os.makedirs(model_save_folder, exist_ok=True)

    # Train the model using torch
    history = pt_train.fit(
        epochs=epochs,
        lr=initial_lr,
        weight_decay=weight_decay,
        model=model,
        callbacks_function=get_callbacks,
        train_loader=train_loader,
        val_loader=val_loader,
        opt_func=optimizer,
    )

    del model

    # load the best model from checkpoint
    model = torch.load(model_save_path + "model.pth")

    train_loss_history = get_result_list(history, "train_loss")
    train_acc_history = get_result_list(history, "train_acc")
    val_loss_history = get_result_list(history, "val_loss")
    val_acc_history = get_result_list(history, "val_acc")

    return model, train_loss_history, train_acc_history, val_loss_history, val_acc_history


def evaluate_model(model, test_loader, verbose=True, eps=1e-10): 
    if verbose:
        print('--------------------------------------------')
        print('Test metrics (on test set)')

    model.eval()

    confusion_matrix = ConfusionMatrix(num_classes=len(class_list))
    eval_preds = list()
    eval_targs = list()

    # computing predictions and confusion matrix
    for i, (images, targets) in enumerate(tqdm(test_loader, position=0, leave=True)):
        images, targets = images.to(device, dtype=torch.float), torch.Tensor(targets).to(device)
        outputs = torch.nn.functional.log_softmax(model(images), dim=1)
        preds = torch.argsort(outputs, dim=1, descending=True)[:, :3]    
        eval_preds.extend(preds[:, 0].cpu().numpy())
        eval_targs.extend(targets.cpu().numpy())

    # computing main metrics (acc, precisio, recall and f1 score)
    matrix = confusion_matrix(torch.tensor(eval_preds), torch.tensor(eval_targs))
    accuracy = matrix.trace() / (matrix.sum() + eps)
    precision = np.array([matrix[i, i] / (matrix.sum(axis=0)[i] + eps) for i in range(len(class_list))])
    recall = np.array([matrix[i, i] / (matrix.sum(axis=1)[i] + eps) for i in range(len(class_list))]) 
    f1_score = 2 * precision * recall / (precision + recall + eps)

    # computing false positive rate, false negative rate, false discovery rate, false omission rate
    fp_rate = np.zeros(len(class_list))
    for idx in range(len(class_list)):
        tn = matrix.trace() - matrix[idx, idx]
        fp = np.sum([matrix[j, idx] for j in range(len(class_list)) if j != idx])
        fp_rate[idx] = fp / (fp + tn + eps)

    fn_rate = 1 - recall 
    fd_rate = 1 - precision
    specificity = 1 - fp_rate

    fo_rate = np.zeros(len(class_list))
    for idx in range(len(class_list)):  
        n = np.sum(np.array(eval_targs) != idx)
        fn = np.sum([matrix.sum(axis=0)[j] - matrix[j, j] for j in range(len(class_list)) if j != idx])
        fo_rate[idx] = fn / (n + eps)

    missclassification_rate = 1 - accuracy
    npv = 1 - fo_rate

    mcc_per_class = []
    for idx in range(len(class_list)):
        tp = matrix[idx, idx].cpu().numpy()
        tn = (matrix.trace() - matrix[idx, idx]).cpu().numpy()
        fp = np.sum([matrix[j, idx] for j in range(len(class_list)) if j != idx])
        fn = np.sum([matrix.sum(axis=0)[j] - matrix[j, j] for j in range(len(class_list)) if j != idx])
        _mcc = mathews_correlation_coefficient_np(tp, fp, fn, tn)
        mcc_per_class.append(_mcc)

    if verbose:
        print('--------------------------------------------')
        print('Accuracy: {:.3f}%'.format(accuracy * 100))
        print('Average precision: {:.3f}'.format(precision.mean()))
        print('Average recall: {:.3f}'.format(recall.mean()))
        print('Average F1 score: {:.3f}'.format(f1_score.mean()))
        print('Average specificity: {:.3f}'.format(specificity.mean()))
        print('Average false positive rate: {:3f}'.format(fp_rate.mean()))
        print('Average false negative rate: {:3f}'.format(fn_rate.mean()))
        print('Average false discovery rate: {:.3f}'.format(fd_rate.mean()))
        print('Average false omission rate: {:.3f}'.format(fo_rate.mean()))
        print('Missclassification rate: {:.2f}%'.format(missclassification_rate * 100))
        print('Mathews Correlation Coefficient: {:.2f}'.format(np.mean(mcc_per_class)))
        print('--------------------------------------------')
        print('Results by class :')    
        print('--------------------------------------------')
        print('{:<15}{:<12}{:<12}{:<12}{:<12}{:<12}{:<12}{:<12}{:<12}{:<12}{:<12}'.format('', 'Precision', 'Recall', 'F1 score', 'Specificity', 'FPR', 'FNR', 'FDR', 'FOR', 'NPV', 'MCC'))
        for idx, class_name in enumerate(class_list):
            print('{:<15}{:<12.2f}{:<12.2f}{:<12.2f}{:<12.3f}{:<12.3f}{:<12.3f}{:<12.3f}{:<12.3f}{:<12.3f}{:<12.3f}'.format(
                class_name, precision[idx], recall[idx], f1_score[idx], specificity[idx], fp_rate[idx], fn_rate[idx], fd_rate[idx], fo_rate[idx], npv[idx], mcc_per_class[idx]
            ))
        print('--------------------------------------------')
        print()

        # ploting confusion matrix
        matrix_df = pd.DataFrame(matrix.numpy(), index=class_list, columns=class_list)
        plt.figure(figsize=(12, 8))
        sn.heatmap(matrix_df, annot=True, fmt='d', cmap='Blues')

    return accuracy, precision, recall, f1_score


### Training each model
Models are loaded from torchvision.models. This library proposes multiple architectures with their weights. It is not recommended to modify these architectures. They are already defined to operate in the most optimal way possible, modifying them risks compromising a certain balance during learning. It is however possible to modify the classification part, which I do in the code by specifying the number of classes in output.

Train on correct dataset splt:


### Hyperopt for finding best combination of hyperparameters

Reference:
https://github.com/hyperopt/hyperopt/wiki/FMin

In [None]:
# 57M parameters
class AlexNet(CustomModelBase):
    def __init__(self, num_classes=2):
        super(AlexNet, self).__init__()
        self.model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT)
        self.model.classifier[-1] = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 11M paramters
class ResNet18(CustomModelBase):
    def __init__(self, num_classes=2):
        super(ResNet18, self).__init__()
        self.model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 21M paramters
class ResNet34(CustomModelBase):
    def __init__(self, num_classes=2):
        super(ResNet34, self).__init__()
        self.model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)
    
class ResNet50(CustomModelBase):
    def __init__(self, num_classes=2):
        super(ResNet50, self).__init__()
        self.model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 42M paramters
class ResNet101(CustomModelBase):
    def __init__(self, num_classes=2):
        super(ResNet101, self).__init__()
        self.model = models.resnet101(weights=models.ResNet101_Weights.DEFAULT)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 6.6M paramters 
# InceptionNet v1 - GoogLeNet
class InceptionNet(CustomModelBase):
    def __init__(self, num_classes=2):
        super(InceptionNet, self).__init__()
        self.model = models.googlenet(weights=models.GoogLeNet_Weights.DEFAULT)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 4M parameters
class EfficientNetB0(CustomModelBase):
    def __init__(self, num_classes=2):
        super(EfficientNetB0, self).__init__()
        self.model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
        self.model.classifier = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 6M parameters
class EfficientNetB1(CustomModelBase):
    def __init__(self, num_classes=2):
        super(EfficientNetB1, self).__init__()
        self.model = models.efficientnet_b1(weights=models.EfficientNet_B1_Weights.DEFAULT)
        self.model.classifier = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 7.7M parameters
class EfficientNetB2(CustomModelBase):
    def __init__(self, num_classes=2):
        super(EfficientNetB2, self).__init__()
        self.model = models.efficientnet_b2(weights=models.EfficientNet_B2_Weights.DEFAULT)
        self.model.classifier = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 20M paramters
class EfficientNetV2_S(CustomModelBase):
    def __init__(self, num_classes=2):
        super(EfficientNetV2_S, self).__init__()
        self.model = models.efficientnet_v2_s(weights=models.EfficientNet_V2_S_Weights.DEFAULT)
        self.model.classifier = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 27M parameters
class ConvNext_T(CustomModelBase):
    def __init__(self, num_classes=2):
        super(ConvNext_T, self).__init__()
        self.model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.DEFAULT)
        self.model.classifier[-1] = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 1.5M paramters
class MobileNet_V3_Small(CustomModelBase):
    def __init__(self, num_classes=2):
        super(MobileNet_V3_Small, self).__init__()
        self.model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.DEFAULT)
        self.model.classifier[-1] = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# 4.2M paramters
class MobileNet_V3_Large(CustomModelBase):
    def __init__(self, num_classes=2):
        super(MobileNet_V3_Large, self).__init__()
        self.model = models.mobilenet_v3_large(weights=models.MobileNet_V3_Large_Weights.DEFAULT)
        self.model.classifier[-1] = nn.Linear(self.model.classifier[-1].in_features, num_classes, bias=True)
        
    def forward(self, x):
        return self.model(x)

# takes 224, 224 input shape
class ViT_B_16(CustomModelBase):
    def __init__(self, num_classes=2):
        super(ViT_B_16, self).__init__()
        self.model = models.vit_b_32(weights=None)
        self.model.heads = nn.Linear(self.model.heads[-1].in_features, num_classes)
        
    def forward(self, x):
        return self.model(x)
    
# takes 224, 224 input shape
class ViT_B_32(CustomModelBase):
    def __init__(self, num_classes=2):
        super(ViT_B_32, self).__init__()
        self.model = models.vit_b_32(weights=None)
        self.model.heads = nn.Linear(self.model.heads[-1].in_features, num_classes)
        
    def forward(self, x):
        return self.model(x)

In [None]:
def get_model_by_name(name):
    model_directory = {
        "AlexNet": AlexNet,
        "ResNet18": ResNet18,
        "ResNet34": ResNet34,
        "ResNet50": ResNet50,
        "ResNet101": ResNet101,
        "InceptionNet": InceptionNet,
        "EfficientNetB0": EfficientNetB0,
        "EfficientNetB1": EfficientNetB1,
        "EfficientNetB2": EfficientNetB2,
        "EfficientNetV2_S": EfficientNetV2_S,
        "ConvNext_T": ConvNext_T,
        "MobileNet_V3_Small": MobileNet_V3_Small,
        "MobileNet_V3_Large": MobileNet_V3_Large,
        "ViT_B_16": ViT_B_16,
        "ViT_B_32": ViT_B_32
    }
    return model_directory[name]

def get_optimizer_by_name(optim):
    if optim == 'SGD':
        optimizer = torch.optim.SGD #(model.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-6)
    elif optim == 'Adam':
        optimizer = torch.optim.Adam #(model.parameters(), lr=0.02, weight_decay=1e-6)
    elif optim == 'RMSProp':
        optimizer = torch.optim.RMSprop #(model.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-6)
    elif optim == 'AdamW':
        optimizer = torch.optim.AdamW #(model.parameters(), lr=0.02, weight_decay=1e-6)
    return optimizer

In [None]:
# define a search space
from datetime import datetime
from hyperopt import hp, STATUS_OK, fmin, tpe, space_eval, Trials
from hyperopt.pyll import scope


def train_model(kwargs):

    print(kwargs)
    epochs = kwargs.get("epochs", EPOCHS) 

    # get model by name
    model_name = kwargs.get("model")
    model_cls = get_model_by_name(model_name)
    model = model_cls(num_classes=len(class_list)).to(device)

    # get optimizer
    optim_args = kwargs.get("optim")
    print(optim_args["params"])
    optimizer_cls = get_optimizer_by_name(optim_args.get("name"))
    optimizer = optimizer_cls(model.parameters(), **optim_args["params"])
    lr = optim_args["params"]["lr"]
    weight_decay = optim_args["params"]["weight_decay"]

    # model, train_loss_history, train_acc_history, val_loss_history, val_acc_history = train_loop(model, optimizer, epochs, train_loader, val_loader, verbose=True, running_hyperopt=True)
    model, train_loss_history, train_acc_history, val_loss_history, val_acc_history = train_loop(model, optimizer_cls, epochs, train_loader, val_loader, initial_lr=lr, weight_decay=weight_decay, model_save_folder=model_save_path, verbose=True, running_hyperopt=True)

    return model, train_loss_history, train_acc_history, val_loss_history, val_acc_history 

def train_model_hyperopt(kwargs):
    
    model, train_loss_history, train_acc_history, val_loss_history, val_acc_history  = train_model(kwargs)
    
    return {"loss": np.mean(val_loss_history), "status": STATUS_OK}

def unpack_values(trial):
    vals = trial["misc"]["vals"]
    # unpack the one-element lists to values
    # and skip over the 0-element lists
    rval = {}
    for k, v in list(vals.items()):
        if v:
            rval[k] = v[0]
    return rval

def export_hyperopt_log(trials):
    result_list = []
    for trial in trials.trials:
        trial_result = space_eval(search_space, unpack_values(trial))
        trial_result["val_loss"] = trial['result']['loss']
        result_list.append(trial_result)

    df_result = pd.DataFrame(result_list)
    df_result = pd.concat([df_result.drop("optim", axis=1), pd.json_normalize(df_result.optim)], axis=1)

    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = "../data/log"
    output_path = os.path.join(output_dir, f"hyperopt_result_{ts}.csv")

    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    df_result.to_csv(output_path, index=False)
    print(f"Exported hyperopt log to {output_path}")
    return df_result


search_space = {
    "epochs": scope.int(hp.choice("epochs", [5, 9, 13, 17, 20, 23, 26, 29, 32, 35, 37, 40, 42, 45])),
    "model": hp.choice("model_name", [
        # "AlexNet",
        # "ResNet18",
        # "ResNet34",
         "ResNet50",
         "ResNet101",
         "InceptionNet",
         "EfficientNetB0",
         "EfficientNetB1",
         "EfficientNetB2",
         "EfficientNetV2_S",
        # "ConvNext_T",
         "MobileNet_V3_Small",
         "MobileNet_V3_Large",
        # "ViT_B_16",
        # "ViT_B_32"
    ]),
    "optim": hp.choice("optim",[
        {
            "name":"Adam",
            "params": {
                "lr": hp.choice("lr-3", [1e-3, 1e-4]),
                "weight_decay": hp.choice("weight_decay-3", [3.310305423548208e-05]) 
            }
        },
    ])    
}

## Plot the heat map with the best model

In [None]:
# train the model with the best parameter
best_params = {'epochs': 10000, 'model': 'EfficientNetV2_S', 'optim': {'name': 'Adam', 'params': {
     'lr': 0.001, 'weight_decay': 3.310305423548208e-05}}}
#best_params = space_eval(search_space, best) #comment in case of changing parameters
best_model, train_loss_history, train_acc_history, val_loss_history, val_acc_history = train_model(best_params)
torch.save(best_model, "single_image.pth")

plot_model_stats(type(best_model).__name__, train_loss_history, train_acc_history, val_loss_history, val_acc_history)
evaluate_model(best_model, test_loader, verbose=True, eps=1e-10)

### Grad cam and feature map visualization

In [None]:
from pytorch_utils.visualization_utils import *

#### Instructions:
* Make sure model files are in the folder `viz_models/`
* When you run the cell, you'll be asked to choose the model you want to visualize. Enter the number of the model you want to visualize and press enter.
* The selected model will run the images in the test set through the model and visualize the GradCAM and feature maps for the selected images.
* You can select the number of images you want to run the GradCAM and feature maps for, by using the variables below. The images will be selected randomly from the test set.
* Outputs will be in the folders `gradcams/` and `feature_maps/`

In [None]:
#####################
MODELS_FOLDER_NAME = "vis_models" + os.sep  # folder containing the model (.pth) files
MAX_GRADCAM_IMAGES = 6000  # max number of images to show the GradCAM for

MAX_FEATURE_MAP_CLASSES = 100  # max number of classes to show the feature maps for
MAX_FEATURE_MAP_IMAGES_PER_CLASS = 1  # max number of images to show the feature maps for, per class. Layer wise visualization will be stored for 0 to n images in separate folders named as `feature_maps/image_i/layer_j.png`

NUM_SINGLE_CHART_CONV_IMGS = 5  # (N_COLUMNS) number of features in each layer, displayed as images in a single column of the "feature maps chart"
NUM_SINGLE_CHART_LAYERS = 20  # (N_ROWS) number of random CNN layers, displayed as rows of the "feature maps chart"

ENABLE_GRADCAM = True  # set to False to disable GradCAM visualization
ENABLE_FEATURE_MAPS = True  # set to False to disable feature maps visualization
#####################


# load the list of models
if not os.path.exists(MODELS_FOLDER_NAME):
    os.makedirs(MODELS_FOLDER_NAME)

    raise Exception(f"Folder {MODELS_FOLDER_NAME} does not exist! The folder is created now, please put the model files in it and run the cell again")

# check if there are any model files in the folder
if len(os.listdir(MODELS_FOLDER_NAME)) == 0:
    raise Exception(f"No model files found in {MODELS_FOLDER_NAME} folder! Please put the model files in it and run the cell again")
models_list = os.listdir(MODELS_FOLDER_NAME)
a
# clear memory
gc.collect()
torch.cuda.empty_cache()

string = "Models available:\n"
for i, model_name in enumerate(models_list):
    string += f"{i + 1}: {model_name}\n"

# choose model
ch = int(input(string + "\nChoose model: "))
model_name = MODELS_FOLDER_NAME + models_list[ch - 1]

# load model
model = torch.load(model_name)
model.eval()

if ENABLE_GRADCAM:
    # extract and save grad cam
    get_gradcam_feature_maps(
        model,
        test_loader,
        class_list=class_list,
        device=device,
        show_gradcam=True,
        show_feature_map=False,
        max_gradcam_images=MAX_GRADCAM_IMAGES,
        feature_map_max_classes=MAX_FEATURE_MAP_CLASSES,
        feature_map_max_images_per_class=MAX_FEATURE_MAP_IMAGES_PER_CLASS,
        num_single_chart_conv_imgs=NUM_SINGLE_CHART_CONV_IMGS,
        num_single_chart_layers=NUM_SINGLE_CHART_LAYERS,
    )

if ENABLE_FEATURE_MAPS:
    # extract and save feature maps and chart
    get_gradcam_feature_maps(
        model,
        test_loader,
        class_list=class_list,
        device=device,
        show_gradcam=False,
        show_feature_map=True,
        max_gradcam_images=MAX_GRADCAM_IMAGES,
        feature_map_max_classes=MAX_FEATURE_MAP_CLASSES,
        feature_map_max_images_per_class=MAX_FEATURE_MAP_IMAGES_PER_CLASS,
        num_single_chart_conv_imgs=NUM_SINGLE_CHART_CONV_IMGS,
        num_single_chart_layers=NUM_SINGLE_CHART_LAYERS,

    )

In [None]:
matplotlib.use(default_matplotlib_backend)
print("Current backend:", matplotlib.get_backend())
from matplotlib import pyplot as plt

from captum.attr import Saliency
from captum.attr import visualization as viz
from matplotlib.colors import LinearSegmentedColormap

# Example
sample_size = 120 # please set to integral multiple of 5
row_entry = 0
loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=1, shuffle=True, drop_last=True)

plt.close("all")
gc.collect()
torch.cuda.empty_cache()

# best_model = model  # COMMENT THIS LINE IF YOU WANT TO USE THE BEST MODEL FROM TRAINING, INSTEAD OF THE LOADED MODEL

# Create IntegratedGradients object and get attributes
saliency = Saliency(best_model)

for i, (images, labels) in enumerate(loader):
    if i == sample_size:
        break
    
    if row_entry % 5 == 0:
        fig, axes = plt.subplots(1, 10, figsize=(16,16))
        row_entry = 1
    else:
        row_entry += 1

    attributions_saliency = saliency.attribute(images.cuda(), target=labels.type(torch.int64).cuda())

    # create custom colormap for visualizing the result
    default_cmap = LinearSegmentedColormap.from_list('custom blue', 
                                                    [(0, '#ffffff'),
                                                    (0.5, '#000000'),
                                                    (1, '#000000')], N=256)

    # Get the class name for the label
    class_name = class_list[labels.item()]

    # visualize the results using the visualize_image_attr helper method

    _ = viz.visualize_image_attr(np.transpose(attributions_saliency.squeeze().cpu().detach().numpy(), (1,2,0)),
                                np.transpose(images.squeeze().cpu().detach().numpy(), (1,2,0)),
                                method="original_image",
                                sign='positive',
                                fig_size=(4,3),
                                plt_fig_axis=(fig, axes[(i*2) % 10]),
                                use_pyplot=False
                                )

    # Set the title for the original image below the image
    axes[(i*2) % 10].set_title(class_name, y=-0.25)


    _ = viz.visualize_image_attr(np.transpose(attributions_saliency.squeeze().cpu().detach().numpy(), (1,2,0)),
                                np.transpose(images.squeeze().cpu().detach().numpy(), (1,2,0)),
                                method="heat_map",
                                sign='positive',
                                cmap=default_cmap,
                                fig_size=(4,3),
                                show_colorbar=True,
                                plt_fig_axis=(fig, axes[(i*2+1) % 10]),
                                use_pyplot=False
                                )

plt.show()