<a href="https://colab.research.google.com/github/romerocruzsa/cp-anemia-detection/blob/main/notebooks/capstone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Import necessary libraries for file handling, data manipulation, and visualization
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Import libraries for working with images and transformations
from PIL import Image
import cv2 as cv

# Import PyTorch modules for model building, data handling, and evaluation
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import torchvision.models as models
import torchvision.models.quantization as quant_models
from torch.utils.checkpoint import checkpoint
from torch.utils.data import Dataset, DataLoader, Subset
from timm import create_model

# from torchinfo import summary

# Import libraries for machine learning metrics and model evaluation
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, r2_score, confusion_matrix
# import torchmetric
from tqdm import tqdm
from datetime import datetime
import json

import warnings
warnings.filterwarnings('ignore')
import gc

# Set the seed.
seed = 42
torch.manual_seed(seed)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x749f30d26990>

In [23]:
data_dir="/home/sebastian-cruz6/cp-anemia-detection/data/cp-anemia/"
weights_dir="/home/sebastian-cruz6/cp-anemia-detection/data/notebooks/weights/"
metrics_dir="/home/sebastian-cruz6/cp-anemia-detection/data/notebooks/metrics/"

# data_dir = "/content/drive/MyDrive/CAWT_Sebastian_202425/CP-AnemiC/"
# weights_dir = "/content/drive/MyDrive/CAWT_Sebastian_202425/Weights/"
anemic_dir=data_dir+"/Anemic/"
non_anemic_dir=data_dir+"/Non-anemic/"
signature = "02082024"

In [6]:
data_sheet_path = data_dir+"Anemia_Data_Collection_Sheet.csv"
data_sheet = pd.read_csv(data_sheet_path)
display(data_sheet)

Unnamed: 0,IMAGE_ID,HB_LEVEL,Severity,Age(Months),GENDER,REMARK,HOSPITAL,CITY/TOWN,MUNICIPALITY/DISTRICT,REGION,COUNTRY
0,Image_001,9.80,Moderate,6,Female,Anemic,Nkawie-Toase Government Hospital,Nkawie-Toase,Atwima Nwabiagya South,Ashanti,Ghana
1,Image_002,9.90,Moderate,24,Male,Anemic,Ejusu Government Hospital,Ejusu,Ejusu Municipality,Ashanti,Ghana
2,Image_003,11.10,Non-Anemic,24,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
3,Image_004,12.50,Non-Anemic,12,Male,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
4,Image_005,9.90,Moderate,24,Male,Anemic,Sunyani Municipal Hospital,Sunyani,Sunyani Municipality,Bono,Ghana
...,...,...,...,...,...,...,...,...,...,...,...
705,Image_706,12.80,Non-Anemic,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana
706,Image_707,11.47,Non-Anemic,48,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
707,Image_708,11.60,Non-Anemic,60,Male,Non-anemic,Komfo Anokye Teaching Hospital,Kumasi,Kumasi Metropolitan,Ashanti,Ghana
708,Image_709,12.10,Non-Anemic,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana


In [7]:
# Mapping diagnosis to severity
severity_mapping = {
    "Non-Anemic": 0,
    "Mild": 1,
    "Moderate": 2,
    "Severe": 3,
}

data_sheet['Severity'] = data_sheet['Severity'].map(severity_mapping)
display(data_sheet)

Unnamed: 0,IMAGE_ID,HB_LEVEL,Severity,Age(Months),GENDER,REMARK,HOSPITAL,CITY/TOWN,MUNICIPALITY/DISTRICT,REGION,COUNTRY
0,Image_001,9.80,2,6,Female,Anemic,Nkawie-Toase Government Hospital,Nkawie-Toase,Atwima Nwabiagya South,Ashanti,Ghana
1,Image_002,9.90,2,24,Male,Anemic,Ejusu Government Hospital,Ejusu,Ejusu Municipality,Ashanti,Ghana
2,Image_003,11.10,0,24,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
3,Image_004,12.50,0,12,Male,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
4,Image_005,9.90,2,24,Male,Anemic,Sunyani Municipal Hospital,Sunyani,Sunyani Municipality,Bono,Ghana
...,...,...,...,...,...,...,...,...,...,...,...
705,Image_706,12.80,0,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana
706,Image_707,11.47,0,48,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
707,Image_708,11.60,0,60,Male,Non-anemic,Komfo Anokye Teaching Hospital,Kumasi,Kumasi Metropolitan,Ashanti,Ghana
708,Image_709,12.10,0,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana


In [8]:
# Define data augmentations or transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=np.random.rand()),
    transforms.RandomVerticalFlip(p=np.random.rand()),
    transforms.RandomRotation(degrees=np.random.randint(0, 360)),
    transforms.RandomAffine(degrees=np.random.randint(0, 360)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom dataset class
class CPAnemiCDataset(Dataset):
    def __init__(self, dir, df, transform=None):
        self.dir = dir
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row['IMAGE_ID']
        img_folder = row['REMARK']
        img_path = os.path.join(self.dir, img_folder, img_id + ".png")
        img = Image.open(img_path).convert('RGB')

        if self.transform:
            img = self.transform(img)

        multiclass_label = torch.tensor(row['Severity'])
        hb_level = torch.tensor(row['HB_LEVEL'])

        return img, multiclass_label, hb_level

    # Load the dataset
image_dataset = CPAnemiCDataset(data_dir, data_sheet, transform=transform)
train_dataset, test_dataset = train_test_split(image_dataset, test_size=0.20, shuffle=True)

print(f"Image Dataset Size (All): {len(image_dataset)}, \
        Train Size: {len(train_dataset)}, \
        Test Size: {len(test_dataset)}")

BATCH_SIZE = 32
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

Image Dataset Size (All): 710,         Train Size: 568,         Test Size: 142


In [9]:
# Default device
device = torch.device('cpu')

# Check for CUDA availability
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    print("CUDA is not available, using CPU.")

print(f"Selected device: {device}")

Selected device: cuda


In [10]:
!nvidia-smi

Thu Feb 13 12:28:02 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off |   00000000:41:00.0 Off |                  Off |
|  0%   42C    P8             34W /  480W |      15MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA GeForce RTX 4090        Off |   00

In [11]:
def get_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    model_size = "Model Size: %.2f MB" %(os.path.getsize("tmp.pt")/1e6)
    os.remove('tmp.pt')
    return model_size

def timed_forward(model, img):
    """Applies checkpointing and logs GPU latency and memory usage."""
    start_event = torch.cuda.Event(enable_timing=True)
    end_event = torch.cuda.Event(enable_timing=True)

    # Record memory before forward pass
    torch.cuda.reset_peak_memory_stats()
    mem_before = torch.cuda.memory_allocated()
    max_mem_before = torch.cuda.max_memory_allocated()

    # Start measuring latency
    start_event.record()
    class_pred, reg_pred = model(img)
    end_event.record()

    torch.cuda.synchronize()  # Ensure timing accuracy
    latency = start_event.elapsed_time(end_event)  # Time in milliseconds

    # Record memory after forward pass
    mem_after = torch.cuda.memory_allocated()
    max_mem_after = torch.cuda.max_memory_allocated()

    # Store stats in a dictionary
    stats = {
        "latency": latency,
        "malloc_before": mem_before,
        "malloc_after": mem_after,
        "max_malloc": max_mem_after,
    }

    return class_pred, reg_pred, stats

# Static Weighting Function. Set eta_class to desired importance (Classification > .5, Regression < .5, Equal == .5)
def sw_loss(loss_class, loss_reg, eta_class=0.5):
    eta_reg = 1 - eta_class
    total_loss = (eta_class * loss_class) + (eta_reg * loss_reg)
    return total_loss

In [12]:
class MultiModel(nn.Module):
    MODEL_MAPPING = {
        "mobilenetv2": lambda: models.mobilenet_v2(pretrained=False),
        "resnet18": lambda: models.resnet18(pretrained=False),
        "densenet121": lambda: models.densenet121(pretrained=False),
        "vgg16": lambda: models.vgg16(pretrained=False),
        "vit-tiny": lambda: create_model("vit_tiny_patch16_224", pretrained=False),
        "convnext-tiny": lambda: models.convnext_tiny(pretrained=False),
        "efficientnet-b0": lambda: models.efficientnet_b0(pretrained=False),
        "shufflenetv2-0.5x": lambda: models.shufflenet_v2_x0_5(pretrained=False),
        "regnety-400mf": lambda: models.regnet_y_400mf(pretrained=False),
        "mnasnet0_5": lambda: models.mnasnet0_5(pretrained=False),
        "ghostnetv2": lambda: create_model('ghostnetv2_100.in1k', pretrained=False),
        "tinynet-a": lambda: create_model("tinynet_a.in1k", pretrained=False)
    }

    FEATURE_LAYER_MAPPING = {
        "fc": ["resnet", "shufflenet", "regnet"],
        "classifier": ["densenet", "vgg", "mobilenet", "efficientnet",
                       "mnasnet","convnext", "ghostnet", "tinynet"],
        "head": ["vit"]
    }

    def __init__(self, model_name):
        super().__init__()
        self.model_name = model_name.lower()

        if self.model_name not in self.MODEL_MAPPING:
            raise ValueError(f"Model {model_name} not supported")

        self.model = self.MODEL_MAPPING[self.model_name]()
        num_ftrs = self._get_feature_size()

        print(f"Initial Backbone {get_model_size(self.model)}")

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(num_ftrs, 128),
            nn.ReLU(),
            nn.Linear(128, 5)
        )

        self._assign_classifier()
        print(f"Modified Backbone {get_model_size(self.model)}\n")

    def _get_feature_size(self):
        """Retrieve the number of input features for the last layer."""
        # Special case for VGG16 since its features need flattening
        if "vgg" in self.model_name:
            return 25088  # VGG16 outputs (batch, 512, 7, 7) -> flattened to 25088

        feature_layers = {
            "fc": getattr(self.model, "fc", None),
            "classifier": getattr(self.model, "classifier", None),
            "head": getattr(self.model, "head", None)
        }

        for key, layer in feature_layers.items():
            if layer:
                return layer[-1].in_features if isinstance(layer, nn.Sequential) else layer.in_features

        return getattr(self.model, "num_features", None)

    def _assign_classifier(self):
        """Assigns the appropriate classifier to the model based on its architecture."""
        if "vgg" in self.model_name:
            self.model.classifier = self.classifier
        else:
          for attr, models in self.FEATURE_LAYER_MAPPING.items():
            if any(m in self.model_name for m in models):
                setattr(self.model, attr, self.classifier)
                return

    def forward(self, x):
        output = self.model(x)
        return output[:, :4], output[:, 4]  # Class probabilities and Hb level estimate

In [13]:
models_list = ["mobilenetv2", "resnet18", "densenet121", "vgg16", "vit-tiny",
               "efficientnet-b0", "shufflenetv2-0.5x", "regnety-400mf",
               "mnasnet0_5", "convnext-tiny", "ghostnetv2", "tinynet-a"
               ]
for arch in models_list:
    print(f"Loading model: {arch}")
    model = MultiModel(arch).to(device)
    # print(summary(model))
    # print(model)

Loading model: mobilenetv2
Initial Backbone Model Size: 14.24 MB
Modified Backbone Model Size: 9.78 MB

Loading model: resnet18
Initial Backbone Model Size: 46.83 MB
Modified Backbone Model Size: 45.04 MB

Loading model: densenet121
Initial Backbone Model Size: 32.47 MB
Modified Backbone Model Size: 28.90 MB

Loading model: vgg16
Initial Backbone Model Size: 553.44 MB
Modified Backbone Model Size: 71.72 MB

Loading model: vit-tiny
Initial Backbone Model Size: 22.92 MB
Modified Backbone Model Size: 22.25 MB

Loading model: efficientnet-b0
Initial Backbone Model Size: 21.43 MB
Modified Backbone Model Size: 16.96 MB

Loading model: shufflenetv2-0.5x
Initial Backbone Model Size: 5.59 MB
Modified Backbone Model Size: 2.02 MB

Loading model: regnety-400mf
Initial Backbone Model Size: 17.61 MB
Modified Backbone Model Size: 16.07 MB

Loading model: mnasnet0_5
Initial Backbone Model Size: 9.04 MB
Modified Backbone Model Size: 4.58 MB

Loading model: convnext-tiny
Initial Backbone Model Size: 11

In [14]:
def train(dataloader, model, class_loss, reg1_loss, reg2_loss, optimizer):
    """Trains the model and logs additional metrics."""
    model.train()
    total_loss = 0
    total_ce_loss = 0
    total_mse_loss = 0
    total_mae_loss = 0
    correct = 0
    total_samples = 0

    all_preds = []
    all_targets = []
    all_probs = []
    all_hb_targets = []
    all_hb_preds = []

    for _, (img, multiclass, hb_level) in enumerate(dataloader):
        img = img.to(device)
        multiclass = multiclass.to(device).long()
        hb_level = hb_level.to(device).unsqueeze(1).float()

        optimizer.zero_grad()

        # Forward pass
        class_pred, reg_pred = model(img)

        # Compute losses
        ce_loss = class_loss(class_pred, multiclass)
        mse_loss = reg1_loss(reg_pred, hb_level)
        mae_loss = reg2_loss(reg_pred, hb_level)
        loss = sw_loss(ce_loss, mse_loss, 0.7)  # Weighted loss

        # Backpropagation
        loss.backward()
        optimizer.step()

        # Track total losses
        total_loss += loss.item()
        total_ce_loss += ce_loss.item()
        total_mse_loss += mse_loss.item()
        total_mae_loss += mae_loss.item()

        # Compute classification accuracy
        class_probs = F.softmax(class_pred, dim=1)
        highest_prob_class = torch.argmax(class_probs, dim=1)

        correct += (highest_prob_class == multiclass).sum().item()
        total_samples += multiclass.size(0)

        # Collect data for additional metrics
        all_preds.extend(highest_prob_class.detach().cpu().numpy())
        all_targets.extend(multiclass.detach().cpu().numpy())
        all_probs.extend(class_probs.detach().cpu().numpy())
        all_hb_targets.extend(hb_level.detach().cpu().numpy())
        all_hb_preds.extend(reg_pred.squeeze().cpu().detach().numpy())

    # Compute additional metrics
    precision = precision_score(all_targets, all_preds, average="weighted")
    recall = recall_score(all_targets, all_preds, average="weighted")
    f1 = f1_score(all_targets, all_preds, average="weighted")
    auc = roc_auc_score(all_targets, all_probs, multi_class="ovr")
    r2 = r2_score(all_hb_targets, all_hb_preds)

    # Compute final statistics
    avg_loss = total_loss / len(dataloader)
    avg_ce_loss = total_ce_loss / len(dataloader)
    avg_mse_loss = total_mse_loss / len(dataloader)
    avg_mae_loss = total_mae_loss / len(dataloader)
    accuracy = correct / total_samples

    # Store metrics
    final_metrics = [avg_loss, avg_ce_loss, accuracy, precision, recall, f1, auc, r2, avg_mae_loss, avg_mse_loss]

    return final_metrics


In [15]:
def eval(dataloader, model, class_loss, reg1_loss, reg2_loss):
    """Evaluates the model with additional metrics: Precision, Recall, AUC, F1, R², Memory Usage, and Latency."""
    model.eval()
    mean_stats = []

    total_loss = 0
    total_ce_loss = 0
    total_mse_loss = 0
    total_mae_loss = 0
    correct = 0
    total_samples = 0

    all_preds = []
    all_targets = []
    all_probs = []
    all_hb_targets = []
    all_hb_preds = []

    torch.cuda.empty_cache()
    gc.collect()

    with torch.no_grad():
        for _, (img, multiclass, hb_level) in enumerate(dataloader):
            img = img.to(device)
            multiclass = multiclass.to(device).long()
            hb_level = hb_level.to(device).unsqueeze(1).float()

            # Forward pass with latency & memory tracking
            class_pred, reg_pred, stats = timed_forward(model, img)
            mean_stats.append(stats)

            # Compute losses
            ce_loss = class_loss(class_pred, multiclass)
            mse_loss = reg1_loss(reg_pred, hb_level)
            mae_loss = reg2_loss(reg_pred, hb_level)
            loss = sw_loss(ce_loss, mse_loss, 0.7)

            # Track total losses
            total_loss += loss.item()
            total_ce_loss += ce_loss.item()
            total_mse_loss += mse_loss.item()
            total_mae_loss += mae_loss.item()

            # Compute classification accuracy
            class_probs = F.softmax(class_pred, dim=1)
            highest_prob_class = torch.argmax(class_probs, dim=1)

            correct += (highest_prob_class == multiclass).sum().item()
            total_samples += multiclass.size(0)

            # Collect data for additional metrics
            all_preds.extend(highest_prob_class.detach().cpu().numpy())
            all_targets.extend(multiclass.detach().cpu().numpy())
            all_probs.extend(class_probs.detach().cpu().numpy())
            all_hb_targets.extend(hb_level.detach().cpu().numpy())
            all_hb_preds.extend(reg_pred.squeeze().detach().cpu().numpy())

    # Compute mean statistics
    mean_latency = np.mean([s["latency"] for s in mean_stats])
    mean_mem_before = np.mean([s["malloc_before"] for s in mean_stats]) / 1_048_576  # Convert bytes to MB
    mean_mem_after = np.mean([s["malloc_after"] for s in mean_stats]) / 1_048_576  # Convert bytes to MB
    mean_max_mem = np.mean([s["max_malloc"] for s in mean_stats]) / 1_048_576  # Convert bytes to MB

    # Store final mean statistics
    final_mean_stats = [mean_latency, mean_mem_before, mean_mem_after, mean_max_mem]

    # Compute additional evaluation metrics
    precision = precision_score(all_targets, all_preds, average="weighted")
    recall = recall_score(all_targets, all_preds, average="weighted")
    f1 = f1_score(all_targets, all_preds, average="weighted")
    auc = roc_auc_score(all_targets, all_probs, multi_class="ovr")
    r2 = r2_score(all_hb_targets, all_hb_preds)

    # Compute confusion matrix
    # cm = confusion_matrix(all_targets, all_preds)

    # Compute final average losses
    avg_loss = total_loss / len(dataloader)
    avg_ce_loss = total_ce_loss / len(dataloader)
    avg_mse_loss = total_mse_loss / len(dataloader)
    avg_mae_loss = total_mae_loss / len(dataloader)
    accuracy = correct / total_samples

    # Store metrics
    final_metrics = [avg_loss, avg_ce_loss, accuracy, precision, recall, f1, auc, r2, avg_mae_loss, avg_mse_loss]

    return final_metrics, final_mean_stats

## Model Selection

### MobileNetV2

MobileNetV2 is a highly efficient deep learning model designed for mobile and embedded vision applications. It builds upon its predecessor, MobileNetV1, by introducing **inverted residual blocks** and **linear bottlenecks**, significantly improving both computational efficiency and accuracy. Instead of using standard convolutions, MobileNetV2 employs **depthwise separable convolutions**, reducing the number of parameters while maintaining expressiveness. This makes it particularly useful for real-time applications such as mobile vision, IoT, and edge computing.

- **Paper:** Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., & Chen, L. C. (2018). *MobileNetV2: Inverted Residuals and Linear Bottlenecks*. [CVPR](https://arxiv.org/abs/1801.04381

In [None]:
# === CONFIGURATION ===
ARCH = "mobilenetv2"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold

with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.json", "w") as f:
    json.dump(val_metrics, f, indent=4)
        
with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.json", "w") as f:
    json.dump(train_metrics, f, indent=4)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: mobilenetv2
Initial Backbone Model Size: 14.24 MB
Modified Backbone Model Size: 9.78 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 27.0702, Cross Entropy: 1.385633, Accuracy: 0.2764, Precision: 0.2319, Recall: 0.2764, F1 Score: 0.2207, AUC: 0.4969, R2 Score: -15.721262, MAE: 9.0022, MSE: 87.0008
Training: Fold 2 - Total Loss: 13.3158, Cross Entropy: 1.322994, Accuracy: 0.4049, Precision: 0.3003, Recall: 0.4049, F1 Score: 0.2477, AUC: 0.4858, R2 Score: -7.371997, MAE: 5.9892, MSE: 41.2989
Training: Fold 3 - Total Loss: 5.2735, Cross Entropy: 1.316092, Accuracy: 0.3732, Precision: 0.2630, Recall: 0.3732, F1 Score: 0.2173, AUC: 0.4917, R2 Score: -2.006215, MAE: 3.2616, MSE: 14.5073
Training: Fold 4 - Total Loss: 2.8050, Cross Entropy: 1.261818, Accuracy: 0.4208, Precision: 0.3161, Recall: 0.4208, F1 Score: 0.2625, AUC: 0.4909, R2 Score: -0.198145, MAE: 2.0612, MSE: 6.4056
Validation: Fold 5 - Total Loss: 4.8882, Cross Entropy: 1.251579, Accuracy: 0.3873, Precision: 0.150

KeyboardInterrupt: 

### ResNet-18

ResNet (Residual Network) introduced a revolutionary technique called **skip connections**, which helps in training very deep networks without suffering from vanishing gradients. The architecture enables **identity mappings** through shortcut connections, allowing gradients to propagate smoothly during backpropagation. The 18-layer variant (ResNet-18) is a lightweight version of the deeper ResNet models, making it suitable for real-time inference while still benefiting from the **deep residual learning approach**.

- **Paper:** He, K., Zhang, X., Ren, S., & Sun, J. (2016). *Deep Residual Learning for Image Recognition*. [CVPR](https://arxiv.org/abs/1512.03385).

In [None]:
ARCH = "resnet18"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: resnet18
Initial Backbone Model Size: 46.83 MB
Modified Backbone Model Size: 45.04 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 24.7441, Cross Entropy: 1.380200, Accuracy: 0.2993, Precision: 0.2951, Recall: 0.2993, F1 Score: 0.2063, AUC: 0.5091, R² Score: -14.134768, MAE: 8.5525, MSE: 79.2599
Training: Fold 2 - Total Loss: 11.1925, Cross Entropy: 1.295400, Accuracy: 0.3521, Precision: 0.3616, Recall: 0.3521, F1 Score: 0.2517, AUC: 0.5116, R² Score: -5.902110, MAE: 5.3572, MSE: 34.2856
Training: Fold 3 - Total Loss: 3.8765, Cross Entropy: 1.264972, Accuracy: 0.3662, Precision: 0.2571, Recall: 0.3662, F1 Score: 0.2980, AUC: 0.5099, R² Score: -1.036240, MAE: 2.6321, MSE: 9.9701
Training: Fold 4 - Total Loss: 2.5766, Cross Entropy: 1.245435, Accuracy: 0.3979, Precision: 0.2655, Recall: 0.3979, F1 Score: 0.2983, AUC: 0.5227, R² Score: -0.062158, MAE: 1.8883, MSE: 5.6827
Training: Fold 5 - Total Loss: 2.5071, Cross Entropy: 1.217934, Accuracy: 0.3873, Precision: 0.1500, Re

### DenseNet-121

DenseNet (Densely Connected Convolutional Networks) improves gradient flow and feature reuse by introducing **dense connections**. Unlike traditional architectures, where layers receive input only from the previous layer, **DenseNet connects each layer to every preceding layer**. This eliminates redundant feature maps and improves efficiency while requiring fewer parameters than other deep networks. DenseNet-121, a version with 121 layers, is particularly effective for feature-rich tasks like medical image classification and object recognition.

- **Paper:** Huang, G., Liu, Z., Van Der Maaten, L., & Weinberger, K. Q. (2017). *Densely Connected Convolutional Networks*. [CVPR](https://arxiv.org/abs/1608.06993).

In [51]:
ARCH = "densenet121"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: densenet121
Initial Backbone Model Size: 32.47 MB
Modified Backbone Model Size: 28.90 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 27.4098, Cross Entropy: 1.353998, Accuracy: 0.3662, Precision: 0.1757, Recall: 0.3662, F1 Score: 0.2375, AUC: 0.5287, R² Score: -15.920037, MAE: 9.0841, MSE: 88.2065
Training: Fold 2 - Total Loss: 15.1666, Cross Entropy: 1.349770, Accuracy: 0.4032, Precision: 0.1625, Recall: 0.4032, F1 Score: 0.2317, AUC: 0.5187, R² Score: -8.609622, MAE: 6.4806, MSE: 47.4059
Training: Fold 3 - Total Loss: 6.6910, Cross Entropy: 1.432706, Accuracy: 0.3785, Precision: 0.1433, Recall: 0.3785, F1 Score: 0.2079, AUC: 0.5451, R² Score: -2.920579, MAE: 3.8256, MSE: 18.9602
Training: Fold 4 - Total Loss: 3.3340, Cross Entropy: 1.372261, Accuracy: 0.4190, Precision: 0.4843, Recall: 0.4190, F1 Score: 0.2495, AUC: 0.5035, R² Score: -0.490915, MAE: 2.3162, MSE: 7.9113
Training: Fold 5 - Total Loss: 2.5496, Cross Entropy: 1.351838, Accuracy: 0.3873, Precision: 0.1500

### VGG16

VGG16 is a deep convolutional neural network that achieved **high accuracy** in image classification tasks while maintaining a **simple architecture**. It consists of **16 layers**, primarily using **small 3×3 convolutions** stacked in increasing depth, with max pooling layers interspersed to reduce spatial dimensions. Unlike more recent models, VGG16 has a relatively high number of parameters, making it computationally expensive. However, it remains widely used for **transfer learning** due to its well-generalized feature representations.

- **Paper:** Simonyan, K., & Zisserman, A. (2015). *Very Deep Convolutional Networks for Large-Scale Image Recognition*. [ICLR](https://arxiv.org/abs/1409.1556).

In [52]:
ARCH = "vgg16"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: vgg16
Initial Backbone Model Size: 553.44 MB
Modified Backbone Model Size: 71.72 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 21.9293, Cross Entropy: 1.355848, Accuracy: 0.4085, Precision: 0.2901, Recall: 0.4085, F1 Score: 0.2724, AUC: 0.5040, R² Score: -12.508848, MAE: 7.2879, MSE: 69.9339
Training: Fold 2 - Total Loss: 4.0123, Cross Entropy: 1.241898, Accuracy: 0.3732, Precision: 0.2657, Recall: 0.3732, F1 Score: 0.3047, AUC: 0.5051, R² Score: -1.093676, MAE: 2.6141, MSE: 10.4764
Training: Fold 3 - Total Loss: 2.6729, Cross Entropy: 1.256730, Accuracy: 0.3750, Precision: 0.2668, Recall: 0.3750, F1 Score: 0.3054, AUC: 0.5098, R² Score: -0.197961, MAE: 1.9666, MSE: 5.9772
Training: Fold 4 - Total Loss: 2.7024, Cross Entropy: 1.252352, Accuracy: 0.4155, Precision: 0.2863, Recall: 0.4155, F1 Score: 0.3010, AUC: 0.4857, R² Score: -0.124779, MAE: 1.9598, MSE: 6.0860
Training: Fold 5 - Total Loss: 2.4722, Cross Entropy: 1.238828, Accuracy: 0.3873, Precision: 0.1500, Recal

### Vision Transformer (ViT-Tiny)

The **Vision Transformer (ViT)** introduces **self-attention mechanisms** to image classification, a method originally developed for natural language processing (NLP). Unlike CNNs, which rely on local feature extraction, ViT processes images as a sequence of **non-overlapping patches** and learns global dependencies. The **ViT-Tiny** model is a lightweight variant of ViT, optimized for lower computational costs while retaining transformer-based advantages such as **better scalability and improved performance on large datasets**.

- **Paper:** Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., et al. (2021). *An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale*. [ICLR](https://arxiv.org/abs/2010.11929).

In [53]:
ARCH = "vit-tiny"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: vit-tiny
Initial Backbone Model Size: 22.92 MB
Modified Backbone Model Size: 22.25 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 20.0479, Cross Entropy: 1.360087, Accuracy: 0.3363, Precision: 0.2804, Recall: 0.3363, F1 Score: 0.2023, AUC: 0.4779, R² Score: -11.192157, MAE: 7.5801, MSE: 63.6529
Training: Fold 2 - Total Loss: 14.2728, Cross Entropy: 1.333152, Accuracy: 0.3292, Precision: 0.1084, Recall: 0.3292, F1 Score: 0.1631, AUC: 0.5143, R² Score: -7.989779, MAE: 6.2892, MSE: 44.4654
Training: Fold 3 - Total Loss: 10.6072, Cross Entropy: 1.316710, Accuracy: 0.3345, Precision: 0.1119, Recall: 0.3345, F1 Score: 0.1677, AUC: 0.5121, R² Score: -5.695963, MAE: 5.2511, MSE: 32.2851
Training: Fold 4 - Total Loss: 8.0231, Cross Entropy: 1.320636, Accuracy: 0.3099, Precision: 0.3042, Recall: 0.3099, F1 Score: 0.1526, AUC: 0.4718, R² Score: -3.471998, MAE: 4.3784, MSE: 23.6622
Training: Fold 5 - Total Loss: 6.7555, Cross Entropy: 1.278247, Accuracy: 0.3099, Precision: 0.0960,

### EfficientNet-B0
EfficientNet introduces an **automated model scaling approach**, where width, depth, and resolution are **balanced** to optimize performance. This was achieved using **Neural Architecture Search (NAS)**, enabling EfficientNet to achieve state-of-the-art accuracy while being significantly **smaller and faster** than previous CNN architectures. EfficientNet-B0 is the smallest variant in the EfficientNet family, designed for mobile and embedded applications where computational efficiency is critical.

- **Paper:** Tan, M., & Le, Q. (2019). *EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks*. [ICML](https://arxiv.org/abs/1905.11946).

In [54]:
ARCH = "efficientnet-b0"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: efficientnet-b0
Initial Backbone Model Size: 21.43 MB
Modified Backbone Model Size: 16.96 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 31.1771, Cross Entropy: 1.443949, Accuracy: 0.1866, Precision: 0.1700, Recall: 0.1866, F1 Score: 0.1620, AUC: 0.5174, R² Score: -18.142623, MAE: 9.7340, MSE: 100.5544
Training: Fold 2 - Total Loss: 23.9194, Cross Entropy: 1.492178, Accuracy: 0.1408, Precision: 0.1604, Recall: 0.1408, F1 Score: 0.0775, AUC: 0.5316, R² Score: -14.249520, MAE: 8.3545, MSE: 76.2497
Training: Fold 3 - Total Loss: 15.9720, Cross Entropy: 1.460226, Accuracy: 0.2060, Precision: 0.2165, Recall: 0.2060, F1 Score: 0.1757, AUC: 0.4731, R² Score: -9.142823, MAE: 6.6130, MSE: 49.8327
Training: Fold 4 - Total Loss: 9.6563, Cross Entropy: 1.393744, Accuracy: 0.3310, Precision: 0.3076, Recall: 0.3310, F1 Score: 0.2782, AUC: 0.4817, R² Score: -4.469530, MAE: 4.8127, MSE: 28.9357
Training: Fold 5 - Total Loss: 3.4698, Cross Entropy: 1.307644, Accuracy: 0.3873, Precision

### ShuffleNetV2-0.5x

ShuffleNetV2 was designed to address practical limitations of lightweight CNNs in real-world applications. Unlike conventional CNNs, it incorporates **channel shuffling** and **group convolutions**, optimizing the trade-off between **speed, accuracy, and memory efficiency**. This makes it an ideal choice for **low-power devices**, including mobile phones and embedded vision systems.

- **Paper:** Ma, N., Zhang, X., Zheng, H. T., & Sun, J. (2018). *ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design*. [ECCV](https://arxiv.org/abs/1807.11164).

In [55]:
ARCH = "shufflenetv2-0.5x"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: shufflenetv2-0.5x
Initial Backbone Model Size: 5.59 MB
Modified Backbone Model Size: 2.02 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 28.5079, Cross Entropy: 1.358708, Accuracy: 0.2060, Precision: 0.1793, Recall: 0.2060, F1 Score: 0.0922, AUC: 0.5153, R² Score: -16.611597, MAE: 9.2832, MSE: 91.8562
Training: Fold 2 - Total Loss: 18.1430, Cross Entropy: 1.305262, Accuracy: 0.2254, Precision: 0.1349, Recall: 0.2254, F1 Score: 0.1593, AUC: 0.5065, R² Score: -10.568779, MAE: 7.2344, MSE: 57.4309
Training: Fold 3 - Total Loss: 9.5285, Cross Entropy: 1.269052, Accuracy: 0.3345, Precision: 0.3652, Recall: 0.3345, F1 Score: 0.2168, AUC: 0.4731, R² Score: -4.941102, MAE: 4.8850, MSE: 28.8006
Training: Fold 4 - Total Loss: 4.8359, Cross Entropy: 1.265574, Accuracy: 0.3750, Precision: 0.2608, Recall: 0.3750, F1 Score: 0.3038, AUC: 0.4935, R² Score: -1.477808, MAE: 3.1193, MSE: 13.1667
Training: Fold 5 - Total Loss: 4.1851, Cross Entropy: 1.242747, Accuracy: 0.3873, Precision: 

### RegNetY-400MF

RegNet is an automated architecture developed by **Facebook AI Research (FAIR)** that optimizes the design of deep neural networks. The **RegNetY-400MF** variant is a computationally efficient model designed for **real-time inference and deployment on edge devices**. Unlike handcrafted architectures, RegNet is **automatically searched and optimized**, leading to a **well-balanced trade-off between accuracy, speed, and resource efficiency**.

- **Paper:** Radosavovic, I., Kosaraju, R. P., Girshick, R., He, K., & Dollár, P. (2020). *Designing Network Design Spaces*. [CVPR](https://arxiv.org/abs/2003.13678).


In [56]:
ARCH = "regnety-400mf"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: regnety-400mf
Initial Backbone Model Size: 17.61 MB
Modified Backbone Model Size: 16.07 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 23.6516, Cross Entropy: 1.373351, Accuracy: 0.4014, Precision: 0.3916, Recall: 0.4014, F1 Score: 0.2435, AUC: 0.4969, R² Score: -13.494167, MAE: 8.3053, MSE: 75.6341
Training: Fold 2 - Total Loss: 9.2816, Cross Entropy: 1.407623, Accuracy: 0.3908, Precision: 0.2379, Recall: 0.3908, F1 Score: 0.2473, AUC: 0.4961, R² Score: -4.561786, MAE: 4.6857, MSE: 27.6542
Training: Fold 3 - Total Loss: 2.8779, Cross Entropy: 1.413074, Accuracy: 0.3838, Precision: 0.2728, Recall: 0.3838, F1 Score: 0.3164, AUC: 0.5145, R² Score: -0.268400, MAE: 2.0344, MSE: 6.2959
Training: Fold 4 - Total Loss: 2.5934, Cross Entropy: 1.268757, Accuracy: 0.4331, Precision: 0.3151, Recall: 0.4331, F1 Score: 0.3647, AUC: 0.5361, R² Score: -0.085457, MAE: 1.8937, MSE: 5.6843
Training: Fold 5 - Total Loss: 2.4551, Cross Entropy: 1.229349, Accuracy: 0.4155, Precision: 0.3763

### MNASNet-0.5

MNASNet was developed by Google as part of the **AutoML** framework to **optimize inference speed on mobile processors**. Unlike traditional CNNs, MNASNet **co-optimizes both accuracy and latency** using **neural architecture search (NAS)**, making it highly efficient for mobile applications.

- **Paper:** Tan, M., Chen, B., Pang, R., et al. (2019). *MNASNet: Platform-Aware Neural Architecture Search for Mobile*. [CVPR](https://arxiv.org/abs/1807.11626).


In [57]:
ARCH = "mnasnet0_5"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: mnasnet0_5
Initial Backbone Model Size: 9.04 MB
Modified Backbone Model Size: 4.58 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 27.6042, Cross Entropy: 1.392637, Accuracy: 0.2060, Precision: 0.2840, Recall: 0.2060, F1 Score: 0.1357, AUC: 0.4990, R² Score: -16.018205, MAE: 9.1085, MSE: 88.7643
Training: Fold 2 - Total Loss: 16.0695, Cross Entropy: 1.324047, Accuracy: 0.3451, Precision: 0.2176, Recall: 0.3451, F1 Score: 0.2658, AUC: 0.5272, R² Score: -9.178379, MAE: 6.6984, MSE: 50.4754
Training: Fold 3 - Total Loss: 7.7117, Cross Entropy: 1.262370, Accuracy: 0.3803, Precision: 0.4780, Recall: 0.3803, F1 Score: 0.2116, AUC: 0.5235, R² Score: -3.682799, MAE: 4.2866, MSE: 22.7601
Training: Fold 4 - Total Loss: 3.8378, Cross Entropy: 1.237336, Accuracy: 0.4190, Precision: 0.4843, Recall: 0.4190, F1 Score: 0.2495, AUC: 0.5358, R² Score: -0.872413, MAE: 2.6415, MSE: 9.9056
Training: Fold 5 - Total Loss: 34.2271, Cross Entropy: 1.384191, Accuracy: 0.2324, Precision: 0.0540, 

### ConvNeXt-Tiny

ConvNeXt is a **modernized version of ResNet**, integrating **design elements from vision transformers** while maintaining the efficiency of convolutional networks. The **ConvNeXt-Tiny** variant is a **lightweight** version optimized for efficiency while maintaining strong generalization capabilities.

- **Paper:** Liu, Z., Mao, H., Wu, C. Y., et al. (2022). *A ConvNet for the 2020s*. [CVPR](https://arxiv.org/abs/2201.03545).


In [58]:
ARCH = "convnext-tiny"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: convnext-tiny
Initial Backbone Model Size: 114.41 MB
Modified Backbone Model Size: 111.72 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 13.9455, Cross Entropy: 1.381115, Accuracy: 0.3257, Precision: 0.3243, Recall: 0.3257, F1 Score: 0.2723, AUC: 0.5044, R² Score: -7.456053, MAE: 5.5012, MSE: 43.2626
Training: Fold 2 - Total Loss: 3.1534, Cross Entropy: 1.292053, Accuracy: 0.3680, Precision: 0.2470, Recall: 0.3680, F1 Score: 0.2760, AUC: 0.4863, R² Score: -0.587454, MAE: 2.1788, MSE: 7.4964
Training: Fold 3 - Total Loss: 2.7735, Cross Entropy: 1.275250, Accuracy: 0.3310, Precision: 0.2391, Recall: 0.3310, F1 Score: 0.2700, AUC: 0.4675, R² Score: -0.360671, MAE: 1.9949, MSE: 6.2694
Training: Fold 4 - Total Loss: 2.7413, Cross Entropy: 1.262085, Accuracy: 0.3979, Precision: 0.3166, Recall: 0.3979, F1 Score: 0.2649, AUC: 0.4903, R² Score: -0.210154, MAE: 1.9777, MSE: 6.1928
Training: Fold 5 - Total Loss: 2.6286, Cross Entropy: 1.247630, Accuracy: 0.3873, Precision: 0.1500

### GhostNetV2

GhostNet introduces **"ghost modules"**, which create additional feature maps using cheap operations instead of standard convolutions. This results in **significant reductions in computation and memory usage**, making GhostNet **highly efficient for mobile vision applications**.

- **Paper:** Han, K., Wang, Y., Tian, Q., et al. (2020). *GhostNet: More Features from Cheap Operations*. [CVPR](https://arxiv.org/abs/1911.11907).


In [59]:
ARCH = "ghostnetv2"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: ghostnetv2
Initial Backbone Model Size: 25.11 MB
Modified Backbone Model Size: 20.64 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 28.2949, Cross Entropy: 1.396691, Accuracy: 0.3151, Precision: 0.3099, Recall: 0.3151, F1 Score: 0.2402, AUC: 0.4985, R² Score: -16.477759, MAE: 9.2129, MSE: 91.0573
Training: Fold 2 - Total Loss: 9.3707, Cross Entropy: 1.347335, Accuracy: 0.4032, Precision: 0.1628, Recall: 0.4032, F1 Score: 0.2320, AUC: 0.4990, R² Score: -4.665324, MAE: 4.6039, MSE: 28.0919
Training: Fold 3 - Total Loss: 2.8918, Cross Entropy: 1.282083, Accuracy: 0.3715, Precision: 0.3736, Recall: 0.3715, F1 Score: 0.2954, AUC: 0.4863, R² Score: -0.297683, MAE: 2.0515, MSE: 6.6478
Training: Fold 4 - Total Loss: 2.6206, Cross Entropy: 1.252838, Accuracy: 0.4067, Precision: 0.2779, Recall: 0.4067, F1 Score: 0.3151, AUC: 0.5053, R² Score: -0.052896, MAE: 1.9213, MSE: 5.8121
Training: Fold 5 - Total Loss: 2.6724, Cross Entropy: 1.237643, Accuracy: 0.3873, Precision: 0.1500, R

### TinyNet-A

TinyNet is designed for **extreme efficiency**, balancing depth, width, and resolution to **optimize model size without sacrificing accuracy**.

- **Paper:** Han, K., Wang, Y., Zhang, Q., et al. (2020). *Model Rubik’s Cube: Twisting Resolution, Depth and Width for TinyNets*. [NeurIPS](https://arxiv.org/abs/2109.15027).

In [60]:
ARCH = "tinynet-a"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

# Unique run identifier (for saving multiple experiments)
# signature = datetime.now().strftime("%Y%m%d-%H%M%S")

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics = []
val_metrics = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_loss, val_ce_loss, val_acc, val_prec, val_rec, val_f1, val_auc, val_mae_loss, val_mse_loss, val_r2, val_stats = eval(
                val_loader, model, cross_entropy_loss, mse_loss, mae_loss
            )
            print(
                f"Validation: Fold {fold} - Total Loss: {val_loss:.4f}, Cross Entropy: {val_ce_loss:4f}, Accuracy: {val_acc:.4f}, "
                f"Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1 Score: {val_f1:.4f}, AUC: {val_auc:.4f}, R2 Score: {val_r2:4f}, "
                f"MAE: {val_mae_loss:.4f}, MSE: {val_mse_loss:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_loss,
                    "cross_entropy_loss": val_ce_loss,
                    "accuracy": val_acc,
                    "precision": val_prec,
                    "recall": val_rec,
                    "f1_score": val_f1,
                    "auc": val_auc,
                    "r2_score": val_r2,
                    "mae_loss": val_mae_loss,
                    "mse_loss": val_mse_loss,
                    "latency_ms": val_stats[0],
                    "memory_before_mb": val_stats[1],
                    "memory_after_mb": val_stats[2],
                    "max_memory_mb": val_stats[3],
                }
            )

            with open(f"{metrics_dir}/validation_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(val_metrics, f, indent=4)

        else:
            # === TRAINING PHASE ===
            train_loss, train_ce_loss, train_acc, train_prec, train_rec, train_f1, train_auc, train_mae_loss, train_mse_loss, train_r2 = train(
                train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer
            )

            print(
                f"Training: Fold {fold} - Total Loss: {train_loss:.4f}, Cross Entropy: {train_ce_loss:4f}, Accuracy: {train_acc:.4f}, "
                f"Precision: {train_prec:.4f}, Recall: {train_rec:.4f}, F1 Score: {train_f1:.4f}, AUC: {train_auc:.4f}, R² Score: {train_r2:4f}, "
                f"MAE: {train_mae_loss:.4f}, MSE: {train_mse_loss:.4f}"
            )

            # Store training metrics
            train_metrics.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_loss,
                    "cross_entropy_loss": train_ce_loss,
                    "accuracy": train_acc,
                    "precision": train_prec,
                    "recall": train_rec,
                    "f1_score": train_f1,
                    "auc": train_auc,
                    "r2_score": train_r2,
                    "mae_loss": train_mae_loss,
                    "mse_loss": train_mse_loss,
                }
            )

            with open(f"{metrics_dir}/training_metrics_{ARCH}_{signature}.json", "w") as f:
                json.dump(train_metrics, f, indent=4)

        fold += 1  # Move to next fold

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: tinynet-a
Initial Backbone Model Size: 25.08 MB
Modified Backbone Model Size: 20.62 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 31.4542, Cross Entropy: 1.407009, Accuracy: 0.2289, Precision: 0.2246, Recall: 0.2289, F1 Score: 0.1686, AUC: 0.4857, R² Score: -18.416358, MAE: 9.7980, MSE: 101.5644
Training: Fold 2 - Total Loss: 24.0547, Cross Entropy: 1.386286, Accuracy: 0.2342, Precision: 0.1475, Recall: 0.2342, F1 Score: 0.1632, AUC: 0.4986, R² Score: -14.433408, MAE: 8.4038, MSE: 76.9476
Training: Fold 3 - Total Loss: 15.8747, Cross Entropy: 1.390098, Accuracy: 0.2940, Precision: 0.2838, Recall: 0.2940, F1 Score: 0.2696, AUC: 0.4936, R² Score: -9.167699, MAE: 6.5884, MSE: 49.6721
Training: Fold 4 - Total Loss: 9.6514, Cross Entropy: 1.346068, Accuracy: 0.4190, Precision: 0.3129, Recall: 0.4190, F1 Score: 0.3557, AUC: 0.5193, R² Score: -4.401826, MAE: 4.7842, MSE: 29.0305
Training: Fold 5 - Total Loss: 6.4151, Cross Entropy: 1.306339, Accuracy: 0.4718, Precision: 0.34

## Running Inference on All 12 Models:

In [61]:
for arch in models_list:
  print("="*100)
  print(f"{arch}")
  model = MultiModel(arch).to(device)
  model.load_state_dict(torch.load(f"{weights_dir}/model_best_accuracy_{arch}_{signature}.pth"))

  # === Testing PHASE ===
  test_loss, test_ce_loss, test_acc, test_prec, test_rec, test_f1, test_auc, test_mae_loss, test_mse_loss, test_r2, test_stats = eval(
      test_loader, model, cross_entropy_loss, mse_loss, mae_loss
  )
  print(
      f"Testing: Total Loss: {test_loss:.4f}, Cross Entropy: {test_ce_loss:4f}, Accuracy: {test_acc:.4f}, "
      f"Precision: {test_prec:.4f}, Recall: {test_rec:.4f}, F1 Score: {test_f1:.4f}, AUC: {test_auc:.4f}, R2 Score: {test_r2:4f}, "
      f"MAE: {test_mae_loss:.4f}, MSE: {test_mse_loss:.4f}"
  )
  print(
      f"Avg Latency (ms): {test_stats[0]:.2f}, Avg Memory Before (MB): {test_stats[1]:.2f}, "
      f"Avg Memory After (MB): {test_stats[2]:.2f}, Avg Max Memory (MB): {test_stats[3]:.2f}"
  )

  # Store validation metrics
  val_metrics.append(
      {
          "total_loss": test_loss,
          "cross_entropy_loss": test_ce_loss,
          "accuracy": test_acc,
          "precision": test_prec,
          "recall": test_rec,
          "f1_score": test_f1,
          "auc": test_auc,
          "r2_score": test_r2,
          "mae_loss": test_mae_loss,
          "mse_loss": test_mse_loss,
          "latency_ms": test_stats[0],
          "memory_before_mb": test_stats[1],
          "memory_after_mb": test_stats[2],
          "max_memory_mb": test_stats[3],
      }
  )

  with open(f"{metrics_dir}/testing_metrics_{arch}_{signature}.json", "w") as f:
                  json.dump(train_metrics, f, indent=4)

mobilenetv2
Initial Backbone Model Size: 14.24 MB
Modified Backbone Model Size: 9.78 MB

Testing: Total Loss: 1.6611, Cross Entropy: 0.394999, Accuracy: 0.8310, Precision: 0.8371, Recall: 0.8310, F1 Score: 0.8319, AUC: 0.9569, R2 Score: -0.015649, MAE: 1.7638, MSE: 4.6154
Avg Latency (ms): 7.29, Avg Memory Before (MB): 177.69, Avg Memory After (MB): 177.69, Avg Max Memory (MB): 462.56
resnet18
Initial Backbone Model Size: 46.83 MB
Modified Backbone Model Size: 45.04 MB

Testing: Total Loss: 1.5598, Cross Entropy: 0.297664, Accuracy: 0.8592, Precision: 0.8563, Recall: 0.8592, F1 Score: 0.8576, AUC: 0.9669, R2 Score: 0.000309, MAE: 1.7469, MSE: 4.5047
Avg Latency (ms): 4.24, Avg Memory Before (MB): 211.61, Avg Memory After (MB): 211.61, Avg Max Memory (MB): 385.56
densenet121
Initial Backbone Model Size: 32.47 MB
Modified Backbone Model Size: 28.90 MB

Testing: Total Loss: 1.5086, Cross Entropy: 0.238805, Accuracy: 0.9014, Precision: 0.8971, Recall: 0.9014, F1 Score: 0.8967, AUC: 0.9802,