<a href="https://colab.research.google.com/github/romerocruzsa/cp-anemia-detection/blob/main/notebooks/capstone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
# Import necessary libraries for file handling, data manipulation, and visualization
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Import libraries for working with images and transformations
from PIL import Image
import cv2 as cv

# Import PyTorch modules for model building, data handling, and evaluation
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import torchvision.models as models
import torchvision.models.quantization as quant_models
from torch.utils.checkpoint import checkpoint
from torch.utils.data import Dataset, DataLoader, Subset
from timm import create_model

# from torchinfo import summary

# Import libraries for machine learning metrics and model evaluation
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, r2_score, confusion_matrix
# import torchmetric
from tqdm import tqdm
from datetime import datetime
import json
import csv

import warnings
warnings.filterwarnings('ignore')
import gc

# Set the seed.
seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x749f30d26990>

In [23]:
data_dir="/home/sebastian-cruz6/cp-anemia-detection/data/cp-anemia/"
weights_dir="/home/sebastian-cruz6/cp-anemia-detection/data/notebooks/weights/"
metrics_dir="/home/sebastian-cruz6/cp-anemia-detection/data/notebooks/metrics/"

# data_dir = "/content/drive/MyDrive/CAWT_Sebastian_202425/CP-AnemiC/"
# weights_dir = "/content/drive/MyDrive/CAWT_Sebastian_202425/Weights/"
anemic_dir=data_dir+"/Anemic/"
non_anemic_dir=data_dir+"/Non-anemic/"
signature = "02082024"

In [6]:
data_sheet_path = data_dir+"Anemia_Data_Collection_Sheet.csv"
data_sheet = pd.read_csv(data_sheet_path)
display(data_sheet)

Unnamed: 0,IMAGE_ID,HB_LEVEL,Severity,Age(Months),GENDER,REMARK,HOSPITAL,CITY/TOWN,MUNICIPALITY/DISTRICT,REGION,COUNTRY
0,Image_001,9.80,Moderate,6,Female,Anemic,Nkawie-Toase Government Hospital,Nkawie-Toase,Atwima Nwabiagya South,Ashanti,Ghana
1,Image_002,9.90,Moderate,24,Male,Anemic,Ejusu Government Hospital,Ejusu,Ejusu Municipality,Ashanti,Ghana
2,Image_003,11.10,Non-Anemic,24,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
3,Image_004,12.50,Non-Anemic,12,Male,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
4,Image_005,9.90,Moderate,24,Male,Anemic,Sunyani Municipal Hospital,Sunyani,Sunyani Municipality,Bono,Ghana
...,...,...,...,...,...,...,...,...,...,...,...
705,Image_706,12.80,Non-Anemic,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana
706,Image_707,11.47,Non-Anemic,48,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
707,Image_708,11.60,Non-Anemic,60,Male,Non-anemic,Komfo Anokye Teaching Hospital,Kumasi,Kumasi Metropolitan,Ashanti,Ghana
708,Image_709,12.10,Non-Anemic,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana


In [7]:
# Mapping diagnosis to severity
severity_mapping = {
    "Non-Anemic": 0,
    "Mild": 1,
    "Moderate": 2,
    "Severe": 3,
}

data_sheet['Severity'] = data_sheet['Severity'].map(severity_mapping)
display(data_sheet)

Unnamed: 0,IMAGE_ID,HB_LEVEL,Severity,Age(Months),GENDER,REMARK,HOSPITAL,CITY/TOWN,MUNICIPALITY/DISTRICT,REGION,COUNTRY
0,Image_001,9.80,2,6,Female,Anemic,Nkawie-Toase Government Hospital,Nkawie-Toase,Atwima Nwabiagya South,Ashanti,Ghana
1,Image_002,9.90,2,24,Male,Anemic,Ejusu Government Hospital,Ejusu,Ejusu Municipality,Ashanti,Ghana
2,Image_003,11.10,0,24,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
3,Image_004,12.50,0,12,Male,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
4,Image_005,9.90,2,24,Male,Anemic,Sunyani Municipal Hospital,Sunyani,Sunyani Municipality,Bono,Ghana
...,...,...,...,...,...,...,...,...,...,...,...
705,Image_706,12.80,0,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana
706,Image_707,11.47,0,48,Female,Non-anemic,Ahmadiyya Muslim Hospital,Tachiman,Techiman Municipality,Bono-East,Ghana
707,Image_708,11.60,0,60,Male,Non-anemic,Komfo Anokye Teaching Hospital,Kumasi,Kumasi Metropolitan,Ashanti,Ghana
708,Image_709,12.10,0,48,Male,Non-anemic,Bolgatanga Regional Hospital,Bolgatanga,Bolgatanga Municipality,Upper East,Ghana


In [8]:
# Define data augmentations or transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=np.random.rand()),
    transforms.RandomVerticalFlip(p=np.random.rand()),
    transforms.RandomRotation(degrees=np.random.randint(0, 360)),
    transforms.RandomAffine(degrees=np.random.randint(0, 360)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom dataset class
class CPAnemiCDataset(Dataset):
    def __init__(self, dir, df, transform=None):
        self.dir = dir
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row['IMAGE_ID']
        img_folder = row['REMARK']
        img_path = os.path.join(self.dir, img_folder, img_id + ".png")
        img = Image.open(img_path).convert('RGB')

        if self.transform:
            img = self.transform(img)

        multiclass_label = torch.tensor(row['Severity'])
        hb_level = torch.tensor(row['HB_LEVEL'])

        return img, multiclass_label, hb_level

    # Load the dataset
image_dataset = CPAnemiCDataset(data_dir, data_sheet, transform=transform)
train_dataset, test_dataset = train_test_split(image_dataset, test_size=0.20, shuffle=True)

print(f"Image Dataset Size (All): {len(image_dataset)}, \
        Train Size: {len(train_dataset)}, \
        Test Size: {len(test_dataset)}")

BATCH_SIZE = 32
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

Image Dataset Size (All): 710,         Train Size: 568,         Test Size: 142


In [9]:
# Default device
device = torch.device('cpu')

# Check for CUDA availability
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    print("CUDA is not available, using CPU.")

print(f"Selected device: {device}")

Selected device: cuda


In [10]:
!nvidia-smi

Thu Feb 13 12:28:02 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off |   00000000:41:00.0 Off |                  Off |
|  0%   42C    P8             34W /  480W |      15MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA GeForce RTX 4090        Off |   00

In [11]:
def get_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    model_size = "Model Size: %.2f MB" %(os.path.getsize("tmp.pt")/1e6)
    os.remove('tmp.pt')
    return model_size

def timed_forward(model, img):
    """Applies checkpointing and logs GPU latency and memory usage."""
    start_event = torch.cuda.Event(enable_timing=True)
    end_event = torch.cuda.Event(enable_timing=True)

    # Record memory before forward pass
    torch.cuda.reset_peak_memory_stats()
    mem_before = torch.cuda.memory_allocated()
    max_mem_before = torch.cuda.max_memory_allocated()

    # Start measuring latency
    start_event.record()
    class_pred, reg_pred = model(img)
    end_event.record()

    torch.cuda.synchronize()  # Ensure timing accuracy
    latency = start_event.elapsed_time(end_event)  # Time in milliseconds

    # Record memory after forward pass
    mem_after = torch.cuda.memory_allocated()
    max_mem_after = torch.cuda.max_memory_allocated()

    # Store stats in a dictionary
    stats = {
        "latency": latency,
        "malloc_before": mem_before,
        "malloc_after": mem_after,
        "max_malloc": max_mem_after,
    }

    return class_pred, reg_pred, stats

# Static Weighting Function. Set eta_class to desired importance (Classification > .5, Regression < .5, Equal == .5)
def sw_loss(loss_class, loss_reg, eta_class=0.5):
    eta_reg = 1 - eta_class
    total_loss = (eta_class * loss_class) + (eta_reg * loss_reg)
    return total_loss

In [12]:
class MultiModel(nn.Module):
    MODEL_MAPPING = {
        "mobilenetv2": lambda: models.mobilenet_v2(pretrained=False),
        "resnet18": lambda: models.resnet18(pretrained=False),
        "densenet121": lambda: models.densenet121(pretrained=False),
        "vgg16": lambda: models.vgg16(pretrained=False),
        "vit-tiny": lambda: create_model("vit_tiny_patch16_224", pretrained=False),
        "convnext-tiny": lambda: models.convnext_tiny(pretrained=False),
        "efficientnet-b0": lambda: models.efficientnet_b0(pretrained=False),
        "shufflenetv2-0.5x": lambda: models.shufflenet_v2_x0_5(pretrained=False),
        "regnety-400mf": lambda: models.regnet_y_400mf(pretrained=False),
        "mnasnet0_5": lambda: models.mnasnet0_5(pretrained=False),
        "ghostnetv2": lambda: create_model('ghostnetv2_100.in1k', pretrained=False),
        "tinynet-a": lambda: create_model("tinynet_a.in1k", pretrained=False)
    }

    FEATURE_LAYER_MAPPING = {
        "fc": ["resnet", "shufflenet", "regnet"],
        "classifier": ["densenet", "vgg", "mobilenet", "efficientnet",
                       "mnasnet","convnext", "ghostnet", "tinynet"],
        "head": ["vit"]
    }

    def __init__(self, model_name):
        super().__init__()
        self.model_name = model_name.lower()

        if self.model_name not in self.MODEL_MAPPING:
            raise ValueError(f"Model {model_name} not supported")

        self.model = self.MODEL_MAPPING[self.model_name]()
        num_ftrs = self._get_feature_size()

        print(f"Initial Backbone {get_model_size(self.model)}")

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(num_ftrs, 128),
            nn.ReLU(),
            nn.Linear(128, 5)
        )

        self._assign_classifier()
        print(f"Modified Backbone {get_model_size(self.model)}\n")

    def _get_feature_size(self):
        """Retrieve the number of input features for the last layer."""
        # Special case for VGG16 since its features need flattening
        if "vgg" in self.model_name:
            return 25088  # VGG16 outputs (batch, 512, 7, 7) -> flattened to 25088

        feature_layers = {
            "fc": getattr(self.model, "fc", None),
            "classifier": getattr(self.model, "classifier", None),
            "head": getattr(self.model, "head", None)
        }

        for key, layer in feature_layers.items():
            if layer:
                return layer[-1].in_features if isinstance(layer, nn.Sequential) else layer.in_features

        return getattr(self.model, "num_features", None)

    def _assign_classifier(self):
        """Assigns the appropriate classifier to the model based on its architecture."""
        if "vgg" in self.model_name:
            self.model.classifier = self.classifier
        else:
          for attr, models in self.FEATURE_LAYER_MAPPING.items():
            if any(m in self.model_name for m in models):
                setattr(self.model, attr, self.classifier)
                return

    def forward(self, x):
        output = self.model(x)
        return output[:, :4], output[:, 4]  # Class probabilities and Hb level estimate

In [13]:
models_list = ["mobilenetv2", "resnet18", "densenet121", "vgg16", "vit-tiny",
               "efficientnet-b0", "shufflenetv2-0.5x", "regnety-400mf",
               "mnasnet0_5", "convnext-tiny", "ghostnetv2", "tinynet-a"
               ]
for arch in models_list:
    print(f"Loading model: {arch}")
    model = MultiModel(arch).to(device)
    # print(summary(model))
    # print(model)

Loading model: mobilenetv2
Initial Backbone Model Size: 14.24 MB
Modified Backbone Model Size: 9.78 MB

Loading model: resnet18
Initial Backbone Model Size: 46.83 MB
Modified Backbone Model Size: 45.04 MB

Loading model: densenet121
Initial Backbone Model Size: 32.47 MB
Modified Backbone Model Size: 28.90 MB

Loading model: vgg16
Initial Backbone Model Size: 553.44 MB
Modified Backbone Model Size: 71.72 MB

Loading model: vit-tiny
Initial Backbone Model Size: 22.92 MB
Modified Backbone Model Size: 22.25 MB

Loading model: efficientnet-b0
Initial Backbone Model Size: 21.43 MB
Modified Backbone Model Size: 16.96 MB

Loading model: shufflenetv2-0.5x
Initial Backbone Model Size: 5.59 MB
Modified Backbone Model Size: 2.02 MB

Loading model: regnety-400mf
Initial Backbone Model Size: 17.61 MB
Modified Backbone Model Size: 16.07 MB

Loading model: mnasnet0_5
Initial Backbone Model Size: 9.04 MB
Modified Backbone Model Size: 4.58 MB

Loading model: convnext-tiny
Initial Backbone Model Size: 11

In [14]:
def train(dataloader, model, class_loss, reg1_loss, reg2_loss, optimizer):
    """Trains the model and logs additional metrics."""
    model.train()
    total_loss = 0
    total_ce_loss = 0
    total_mse_loss = 0
    total_mae_loss = 0
    correct = 0
    total_samples = 0

    all_preds = []
    all_targets = []
    all_probs = []
    all_hb_targets = []
    all_hb_preds = []

    for _, (img, multiclass, hb_level) in enumerate(dataloader):
        img = img.to(device)
        multiclass = multiclass.to(device).long()
        hb_level = hb_level.to(device).unsqueeze(1).float()

        optimizer.zero_grad()

        # Forward pass
        class_pred, reg_pred = model(img)

        # Compute losses
        ce_loss = class_loss(class_pred, multiclass)
        mse_loss = reg1_loss(reg_pred, hb_level)
        mae_loss = reg2_loss(reg_pred, hb_level)
        loss = sw_loss(ce_loss, mse_loss, 0.7)  # Weighted loss

        # Backpropagation
        loss.backward()
        optimizer.step()

        # Track total losses
        total_loss += loss.item()
        total_ce_loss += ce_loss.item()
        total_mse_loss += mse_loss.item()
        total_mae_loss += mae_loss.item()

        # Compute classification accuracy
        class_probs = F.softmax(class_pred, dim=1)
        highest_prob_class = torch.argmax(class_probs, dim=1)

        correct += (highest_prob_class == multiclass).sum().item()
        total_samples += multiclass.size(0)

        # Collect data for additional metrics
        all_preds.extend(highest_prob_class.detach().cpu().numpy())
        all_targets.extend(multiclass.detach().cpu().numpy())
        all_probs.extend(class_probs.detach().cpu().numpy())
        all_hb_targets.extend(hb_level.detach().cpu().numpy())
        all_hb_preds.extend(reg_pred.squeeze().cpu().detach().numpy())

    # Compute additional metrics
    precision = precision_score(all_targets, all_preds, average="weighted")
    recall = recall_score(all_targets, all_preds, average="weighted")
    f1 = f1_score(all_targets, all_preds, average="weighted")
    auc = roc_auc_score(all_targets, all_probs, multi_class="ovr")
    r2 = r2_score(all_hb_targets, all_hb_preds)

    # Compute final statistics
    avg_loss = total_loss / len(dataloader)
    avg_ce_loss = total_ce_loss / len(dataloader)
    avg_mse_loss = total_mse_loss / len(dataloader)
    avg_mae_loss = total_mae_loss / len(dataloader)
    accuracy = correct / total_samples

    # Store metrics
    final_metrics = [avg_loss, avg_ce_loss, accuracy, precision, recall, f1, auc, r2, avg_mae_loss, avg_mse_loss]

    return final_metrics


In [15]:
def eval(dataloader, model, class_loss, reg1_loss, reg2_loss):
    """Evaluates the model with additional metrics: Precision, Recall, AUC, F1, R², Memory Usage, and Latency."""
    model.eval()
    mean_stats = []

    total_loss = 0
    total_ce_loss = 0
    total_mse_loss = 0
    total_mae_loss = 0
    correct = 0
    total_samples = 0

    all_preds = []
    all_targets = []
    all_probs = []
    all_hb_targets = []
    all_hb_preds = []

    torch.cuda.empty_cache()
    gc.collect()

    with torch.no_grad():
        for _, (img, multiclass, hb_level) in enumerate(dataloader):
            img = img.to(device)
            multiclass = multiclass.to(device).long()
            hb_level = hb_level.to(device).unsqueeze(1).float()

            # Forward pass with latency & memory tracking
            class_pred, reg_pred, stats = timed_forward(model, img)
            mean_stats.append(stats)

            # Compute losses
            ce_loss = class_loss(class_pred, multiclass)
            mse_loss = reg1_loss(reg_pred, hb_level)
            mae_loss = reg2_loss(reg_pred, hb_level)
            loss = sw_loss(ce_loss, mse_loss, 0.7)

            # Track total losses
            total_loss += loss.item()
            total_ce_loss += ce_loss.item()
            total_mse_loss += mse_loss.item()
            total_mae_loss += mae_loss.item()

            # Compute classification accuracy
            class_probs = F.softmax(class_pred, dim=1)
            highest_prob_class = torch.argmax(class_probs, dim=1)

            correct += (highest_prob_class == multiclass).sum().item()
            total_samples += multiclass.size(0)

            # Collect data for additional metrics
            all_preds.extend(highest_prob_class.detach().cpu().numpy())
            all_targets.extend(multiclass.detach().cpu().numpy())
            all_probs.extend(class_probs.detach().cpu().numpy())
            all_hb_targets.extend(hb_level.detach().cpu().numpy())
            all_hb_preds.extend(reg_pred.squeeze().detach().cpu().numpy())

    # Compute mean statistics
    mean_latency = np.mean([s["latency"] for s in mean_stats])
    mean_mem_before = np.mean([s["malloc_before"] for s in mean_stats]) / 1_048_576  # Convert bytes to MB
    mean_mem_after = np.mean([s["malloc_after"] for s in mean_stats]) / 1_048_576  # Convert bytes to MB
    mean_max_mem = np.mean([s["max_malloc"] for s in mean_stats]) / 1_048_576  # Convert bytes to MB

    # Store final mean statistics
    final_mean_stats = [mean_latency, mean_mem_before, mean_mem_after, mean_max_mem]

    # Compute additional evaluation metrics
    precision = precision_score(all_targets, all_preds, average="weighted")
    recall = recall_score(all_targets, all_preds, average="weighted")
    f1 = f1_score(all_targets, all_preds, average="weighted")
    auc = roc_auc_score(all_targets, all_probs, multi_class="ovr")
    r2 = r2_score(all_hb_targets, all_hb_preds)

    # Compute confusion matrix
    # cm = confusion_matrix(all_targets, all_preds)

    # Compute final average losses
    avg_loss = total_loss / len(dataloader)
    avg_ce_loss = total_ce_loss / len(dataloader)
    avg_mse_loss = total_mse_loss / len(dataloader)
    avg_mae_loss = total_mae_loss / len(dataloader)
    accuracy = correct / total_samples

    # Store metrics
    final_metrics = [avg_loss, avg_ce_loss, accuracy, precision, recall, f1, auc, r2, avg_mae_loss, avg_mse_loss]

    return final_metrics, final_mean_stats

## Model Selection

### MobileNetV2

MobileNetV2 is a highly efficient deep learning model designed for mobile and embedded vision applications. It builds upon its predecessor, MobileNetV1, by introducing **inverted residual blocks** and **linear bottlenecks**, significantly improving both computational efficiency and accuracy. Instead of using standard convolutions, MobileNetV2 employs **depthwise separable convolutions**, reducing the number of parameters while maintaining expressiveness. This makes it particularly useful for real-time applications such as mobile vision, IoT, and edge computing.

- **Paper:** Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., & Chen, L. C. (2018). *MobileNetV2: Inverted Residuals and Linear Bottlenecks*. [CVPR](https://arxiv.org/abs/1801.04381

In [39]:
# === CONFIGURATION ===
ARCH = "mobilenetv2"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: mobilenetv2
Initial Backbone Model Size: 14.24 MB
Modified Backbone Model Size: 9.78 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 29.2153, Cross Entropy: 1.359509, Accuracy: 0.3239, Precision: 0.3055, Recall: 0.3239, F1 Score: 0.2436, AUC: 0.5319, R2 Score: -17.071795, MAE: 9.4009, MSE: 94.2123
Training: Fold 2 - Total Loss: 17.2406, Cross Entropy: 1.310734, Accuracy: 0.3415, Precision: 0.3264, Recall: 0.3415, F1 Score: 0.2068, AUC: 0.5240, R2 Score: -9.991949, MAE: 6.9839, MSE: 54.4103
Training: Fold 3 - Total Loss: 8.2275, Cross Entropy: 1.273699, Accuracy: 0.3680, Precision: 0.2554, Recall: 0.3680, F1 Score: 0.2877, AUC: 0.4974, R2 Score: -4.032566, MAE: 4.4436, MSE: 24.4529
Training: Fold 4 - Total Loss: 3.9571, Cross Entropy: 1.244234, Accuracy: 0.4155, Precision: 0.2513, Recall: 0.4155, F1 Score: 0.2483, AUC: 0.4919, R2 Score: -0.949706, MAE: 2.6808, MSE: 10.2870
Validation: Fold 5 - Total Loss: 5.0146, Cross Entropy: 1.266812, Accuracy: 0.3099, Precision: 0.09

### ResNet-18

ResNet (Residual Network) introduced a revolutionary technique called **skip connections**, which helps in training very deep networks without suffering from vanishing gradients. The architecture enables **identity mappings** through shortcut connections, allowing gradients to propagate smoothly during backpropagation. The 18-layer variant (ResNet-18) is a lightweight version of the deeper ResNet models, making it suitable for real-time inference while still benefiting from the **deep residual learning approach**.

- **Paper:** He, K., Zhang, X., Ren, S., & Sun, J. (2016). *Deep Residual Learning for Image Recognition*. [CVPR](https://arxiv.org/abs/1512.03385).

In [40]:
ARCH = "resnet18"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: resnet18
Initial Backbone Model Size: 46.83 MB
Modified Backbone Model Size: 45.04 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 25.8165, Cross Entropy: 1.298057, Accuracy: 0.3116, Precision: 0.2519, Recall: 0.3116, F1 Score: 0.1861, AUC: 0.5284, R2 Score: -14.901662, MAE: 8.7603, MSE: 83.0261
Training: Fold 2 - Total Loss: 11.4782, Cross Entropy: 1.245891, Accuracy: 0.3609, Precision: 0.2754, Recall: 0.3609, F1 Score: 0.2974, AUC: 0.5207, R2 Score: -6.131155, MAE: 5.4318, MSE: 35.3535
Training: Fold 3 - Total Loss: 3.6055, Cross Entropy: 1.258321, Accuracy: 0.3838, Precision: 0.2747, Recall: 0.3838, F1 Score: 0.3200, AUC: 0.5128, R2 Score: -0.861862, MAE: 2.4813, MSE: 9.0824
Training: Fold 4 - Total Loss: 2.5791, Cross Entropy: 1.261209, Accuracy: 0.4102, Precision: 0.2735, Recall: 0.4102, F1 Score: 0.2954, AUC: 0.4944, R2 Score: -0.070103, MAE: 1.8850, MSE: 5.6541
Validation: Fold 5 - Total Loss: 2.4768, Cross Entropy: 1.217602, Accuracy: 0.3873, Precision: 0.1500, 

Training: Fold 3 - Total Loss: 2.2898, Cross Entropy: 1.141286, Accuracy: 0.4683, Precision: 0.3832, Recall: 0.4683, F1 Score: 0.4029, AUC: 0.6799, R2 Score: -0.037148, MAE: 1.7742, MSE: 4.9697
Training: Fold 4 - Total Loss: 2.4330, Cross Entropy: 1.140733, Accuracy: 0.4630, Precision: 0.4054, Recall: 0.4630, F1 Score: 0.4219, AUC: 0.6655, R2 Score: -0.040623, MAE: 1.8574, MSE: 5.4483
Validation: Fold 5 - Total Loss: 2.3613, Cross Entropy: 1.083236, Accuracy: 0.5423, Precision: 0.4624, Recall: 0.5423, F1 Score: 0.4599, AUC: 0.6942, R2 Score: -0.001191, MAE: 1.8178, MSE: 5.3433
Avg Latency (ms): 3.17, Avg Memory Before (MB): 264.81, Avg Memory After (MB): 264.81, Avg Max Memory (MB): 438.76
Best model saved with Accuracy: 0.5423

Epoch 9/150
Training: Fold 1 - Total Loss: 2.3925, Cross Entropy: 1.133266, Accuracy: 0.4930, Precision: 0.4322, Recall: 0.4930, F1 Score: 0.4468, AUC: 0.6711, R2 Score: -0.021139, MAE: 1.8547, MSE: 5.3307
Training: Fold 2 - Total Loss: 2.3096, Cross Entropy: 1

### DenseNet-121

DenseNet (Densely Connected Convolutional Networks) improves gradient flow and feature reuse by introducing **dense connections**. Unlike traditional architectures, where layers receive input only from the previous layer, **DenseNet connects each layer to every preceding layer**. This eliminates redundant feature maps and improves efficiency while requiring fewer parameters than other deep networks. DenseNet-121, a version with 121 layers, is particularly effective for feature-rich tasks like medical image classification and object recognition.

- **Paper:** Huang, G., Liu, Z., Van Der Maaten, L., & Weinberger, K. Q. (2017). *Densely Connected Convolutional Networks*. [CVPR](https://arxiv.org/abs/1608.06993).

In [41]:
ARCH = "densenet121"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: densenet121
Initial Backbone Model Size: 32.47 MB
Modified Backbone Model Size: 28.90 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 28.1720, Cross Entropy: 1.345035, Accuracy: 0.3926, Precision: 0.2044, Recall: 0.3926, F1 Score: 0.2545, AUC: 0.5055, R2 Score: -16.425638, MAE: 9.2275, MSE: 90.7683
Training: Fold 2 - Total Loss: 16.3604, Cross Entropy: 1.362946, Accuracy: 0.4032, Precision: 0.1625, Recall: 0.4032, F1 Score: 0.2317, AUC: 0.5074, R2 Score: -9.353037, MAE: 6.7686, MSE: 51.3544
Training: Fold 3 - Total Loss: 7.5788, Cross Entropy: 1.406786, Accuracy: 0.3785, Precision: 0.1433, Recall: 0.3785, F1 Score: 0.2079, AUC: 0.5461, R2 Score: -3.549991, MAE: 4.1877, MSE: 21.9803
Training: Fold 4 - Total Loss: 3.6895, Cross Entropy: 1.298687, Accuracy: 0.4173, Precision: 0.1741, Recall: 0.4173, F1 Score: 0.2457, AUC: 0.5183, R2 Score: -0.749327, MAE: 2.5552, MSE: 9.2679
Validation: Fold 5 - Total Loss: 2.5563, Cross Entropy: 1.250572, Accuracy: 0.3873, Precision: 0.15

### VGG16

VGG16 is a deep convolutional neural network that achieved **high accuracy** in image classification tasks while maintaining a **simple architecture**. It consists of **16 layers**, primarily using **small 3×3 convolutions** stacked in increasing depth, with max pooling layers interspersed to reduce spatial dimensions. Unlike more recent models, VGG16 has a relatively high number of parameters, making it computationally expensive. However, it remains widely used for **transfer learning** due to its well-generalized feature representations.

- **Paper:** Simonyan, K., & Zisserman, A. (2015). *Very Deep Convolutional Networks for Large-Scale Image Recognition*. [ICLR](https://arxiv.org/abs/1409.1556).

In [42]:
ARCH = "vgg16"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: vgg16
Initial Backbone Model Size: 553.44 MB
Modified Backbone Model Size: 71.72 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 9.9103, Cross Entropy: 1.849021, Accuracy: 0.3081, Precision: 0.3631, Recall: 0.3081, F1 Score: 0.2800, AUC: 0.4716, R2 Score: -4.444187, MAE: 4.2857, MSE: 28.7201
Training: Fold 2 - Total Loss: 2.5758, Cross Entropy: 1.287496, Accuracy: 0.3803, Precision: 0.2675, Recall: 0.3803, F1 Score: 0.2910, AUC: 0.5219, R2 Score: -0.071777, MAE: 1.8844, MSE: 5.5818
Training: Fold 3 - Total Loss: 2.6269, Cross Entropy: 1.276791, Accuracy: 0.3204, Precision: 0.2665, Recall: 0.3204, F1 Score: 0.2674, AUC: 0.4771, R2 Score: -0.151356, MAE: 1.9357, MSE: 5.7773
Training: Fold 4 - Total Loss: 2.8005, Cross Entropy: 1.248845, Accuracy: 0.4085, Precision: 0.2602, Recall: 0.4085, F1 Score: 0.2701, AUC: 0.4972, R2 Score: -0.187160, MAE: 2.0407, MSE: 6.4210
Validation: Fold 5 - Total Loss: 2.5164, Cross Entropy: 1.238222, Accuracy: 0.3873, Precision: 0.1500, Recall

### Vision Transformer (ViT-Tiny)

The **Vision Transformer (ViT)** introduces **self-attention mechanisms** to image classification, a method originally developed for natural language processing (NLP). Unlike CNNs, which rely on local feature extraction, ViT processes images as a sequence of **non-overlapping patches** and learns global dependencies. The **ViT-Tiny** model is a lightweight variant of ViT, optimized for lower computational costs while retaining transformer-based advantages such as **better scalability and improved performance on large datasets**.

- **Paper:** Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., et al. (2021). *An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale*. [ICLR](https://arxiv.org/abs/2010.11929).

In [43]:
ARCH = "vit-tiny"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: vit-tiny
Initial Backbone Model Size: 22.92 MB
Modified Backbone Model Size: 22.25 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 21.9865, Cross Entropy: 1.370414, Accuracy: 0.3521, Precision: 0.2817, Recall: 0.3521, F1 Score: 0.2808, AUC: 0.5032, R2 Score: -12.417298, MAE: 8.0047, MSE: 70.0908
Training: Fold 2 - Total Loss: 16.1884, Cross Entropy: 1.354139, Accuracy: 0.3820, Precision: 0.3025, Recall: 0.3820, F1 Score: 0.3138, AUC: 0.5003, R2 Score: -9.267955, MAE: 6.7649, MSE: 50.8018
Training: Fold 3 - Total Loss: 12.1890, Cross Entropy: 1.331390, Accuracy: 0.3785, Precision: 0.2568, Recall: 0.3785, F1 Score: 0.2577, AUC: 0.4933, R2 Score: -6.744116, MAE: 5.7141, MSE: 37.5233
Training: Fold 4 - Total Loss: 9.2412, Cross Entropy: 1.287779, Accuracy: 0.3926, Precision: 0.2548, Recall: 0.3926, F1 Score: 0.2756, AUC: 0.4772, R2 Score: -4.220162, MAE: 4.8187, MSE: 27.7991
Validation: Fold 5 - Total Loss: 7.9599, Cross Entropy: 1.281374, Accuracy: 0.3873, Precision: 0.150

Training: Fold 1 - Total Loss: 6.3731, Cross Entropy: 1.259205, Accuracy: 0.4085, Precision: 0.2562, Recall: 0.4085, F1 Score: 0.2503, AUC: 0.5123, R2 Score: -2.495691, MAE: 3.7760, MSE: 18.3054
Training: Fold 2 - Total Loss: 4.4610, Cross Entropy: 1.264093, Accuracy: 0.3961, Precision: 0.2712, Recall: 0.3961, F1 Score: 0.2611, AUC: 0.4424, R2 Score: -1.432314, MAE: 2.9497, MSE: 11.9206
Training: Fold 3 - Total Loss: 3.2295, Cross Entropy: 1.260053, Accuracy: 0.3838, Precision: 0.2774, Recall: 0.3838, F1 Score: 0.2690, AUC: 0.4890, R2 Score: -0.624959, MAE: 2.2983, MSE: 7.8249
Training: Fold 4 - Total Loss: 2.8561, Cross Entropy: 1.247788, Accuracy: 0.4102, Precision: 0.2756, Recall: 0.4102, F1 Score: 0.2913, AUC: 0.4906, R2 Score: -0.219684, MAE: 2.0972, MSE: 6.6088
Validation: Fold 5 - Total Loss: 2.6330, Cross Entropy: 1.237317, Accuracy: 0.3873, Precision: 0.1500, Recall: 0.3873, F1 Score: 0.2163, AUC: 0.5758, R2 Score: -0.085057, MAE: 1.9277, MSE: 5.8897
Avg Latency (ms): 4.59, Av

### EfficientNet-B0
EfficientNet introduces an **automated model scaling approach**, where width, depth, and resolution are **balanced** to optimize performance. This was achieved using **Neural Architecture Search (NAS)**, enabling EfficientNet to achieve state-of-the-art accuracy while being significantly **smaller and faster** than previous CNN architectures. EfficientNet-B0 is the smallest variant in the EfficientNet family, designed for mobile and embedded applications where computational efficiency is critical.

- **Paper:** Tan, M., & Le, Q. (2019). *EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks*. [ICML](https://arxiv.org/abs/1905.11946).

In [44]:
ARCH = "efficientnet-b0"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: efficientnet-b0
Initial Backbone Model Size: 21.43 MB
Modified Backbone Model Size: 16.96 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 32.0989, Cross Entropy: 1.387965, Accuracy: 0.2271, Precision: 0.2774, Recall: 0.2271, F1 Score: 0.1997, AUC: 0.5025, R2 Score: -18.797562, MAE: 9.9120, MSE: 103.7576
Training: Fold 2 - Total Loss: 24.8694, Cross Entropy: 1.340195, Accuracy: 0.3204, Precision: 0.2372, Recall: 0.3204, F1 Score: 0.2309, AUC: 0.4900, R2 Score: -15.042148, MAE: 8.5913, MSE: 79.7709
Training: Fold 3 - Total Loss: 16.9931, Cross Entropy: 1.288777, Accuracy: 0.3345, Precision: 0.2397, Recall: 0.3345, F1 Score: 0.2793, AUC: 0.5169, R2 Score: -10.057818, MAE: 6.8633, MSE: 53.6364
Training: Fold 4 - Total Loss: 10.5256, Cross Entropy: 1.281039, Accuracy: 0.3926, Precision: 0.2674, Recall: 0.3926, F1 Score: 0.2898, AUC: 0.5052, R2 Score: -5.078046, MAE: 5.0970, MSE: 32.0962
Validation: Fold 5 - Total Loss: 4.3387, Cross Entropy: 1.342427, Accuracy: 0.3873, Preci

Training: Fold 1 - Total Loss: 2.5033, Cross Entropy: 1.230960, Accuracy: 0.4102, Precision: 0.2942, Recall: 0.4102, F1 Score: 0.2917, AUC: 0.5212, R2 Score: -0.053269, MAE: 1.8831, MSE: 5.4721
Training: Fold 2 - Total Loss: 2.3856, Cross Entropy: 1.239844, Accuracy: 0.3996, Precision: 0.2825, Recall: 0.3996, F1 Score: 0.2984, AUC: 0.5060, R2 Score: -0.011278, MAE: 1.7837, MSE: 5.0590
Training: Fold 3 - Total Loss: 2.3643, Cross Entropy: 1.247628, Accuracy: 0.3750, Precision: 0.2449, Recall: 0.3750, F1 Score: 0.2272, AUC: 0.5310, R2 Score: -0.020302, MAE: 1.7768, MSE: 4.9699
Training: Fold 4 - Total Loss: 2.5051, Cross Entropy: 1.237988, Accuracy: 0.4208, Precision: 0.2888, Recall: 0.4208, F1 Score: 0.3060, AUC: 0.5442, R2 Score: -0.014463, MAE: 1.8650, MSE: 5.4616
Validation: Fold 5 - Total Loss: 2.4054, Cross Entropy: 1.214013, Accuracy: 0.3803, Precision: 0.2066, Recall: 0.3803, F1 Score: 0.2399, AUC: 0.5862, R2 Score: 0.023491, MAE: 1.8101, MSE: 5.1854
Avg Latency (ms): 4.81, Avg M

### ShuffleNetV2-0.5x

ShuffleNetV2 was designed to address practical limitations of lightweight CNNs in real-world applications. Unlike conventional CNNs, it incorporates **channel shuffling** and **group convolutions**, optimizing the trade-off between **speed, accuracy, and memory efficiency**. This makes it an ideal choice for **low-power devices**, including mobile phones and embedded vision systems.

- **Paper:** Ma, N., Zhang, X., Zheng, H. T., & Sun, J. (2018). *ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design*. [ECCV](https://arxiv.org/abs/1807.11164).

In [45]:
ARCH = "shufflenetv2-0.5x"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: shufflenetv2-0.5x
Initial Backbone Model Size: 5.59 MB
Modified Backbone Model Size: 2.02 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 28.8853, Cross Entropy: 1.372443, Accuracy: 0.3873, Precision: 0.2111, Recall: 0.3873, F1 Score: 0.2574, AUC: 0.5220, R2 Score: -16.838568, MAE: 9.3552, MSE: 93.0821
Training: Fold 2 - Total Loss: 17.3760, Cross Entropy: 1.387062, Accuracy: 0.4032, Precision: 0.1625, Recall: 0.4032, F1 Score: 0.2317, AUC: 0.5212, R2 Score: -10.038296, MAE: 7.0135, MSE: 54.6835
Training: Fold 3 - Total Loss: 8.7408, Cross Entropy: 1.432248, Accuracy: 0.3785, Precision: 0.1433, Recall: 0.3785, F1 Score: 0.2079, AUC: 0.5105, R2 Score: -4.314227, MAE: 4.6055, MSE: 25.7941
Training: Fold 4 - Total Loss: 4.3440, Cross Entropy: 1.325396, Accuracy: 0.4173, Precision: 0.1741, Recall: 0.4173, F1 Score: 0.2457, AUC: 0.5064, R2 Score: -1.112843, MAE: 2.8773, MSE: 11.3874
Validation: Fold 5 - Total Loss: 3.5378, Cross Entropy: 1.279243, Accuracy: 0.3873, Precision

Training: Fold 4 - Total Loss: 2.5059, Cross Entropy: 1.251630, Accuracy: 0.4208, Precision: 0.3295, Recall: 0.4208, F1 Score: 0.2701, AUC: 0.4748, R2 Score: -0.015836, MAE: 1.8487, MSE: 5.4326
Validation: Fold 5 - Total Loss: 2.4499, Cross Entropy: 1.225564, Accuracy: 0.3873, Precision: 0.1500, Recall: 0.3873, F1 Score: 0.2163, AUC: 0.6663, R2 Score: 0.001946, MAE: 1.8163, MSE: 5.3067
Avg Latency (ms): 2.65, Avg Memory Before (MB): 97.29, Avg Memory After (MB): 97.30, Avg Max Memory (MB): 162.53

Epoch 4/150
Training: Fold 1 - Total Loss: 2.4489, Cross Entropy: 1.236025, Accuracy: 0.4085, Precision: 0.1668, Recall: 0.4085, F1 Score: 0.2369, AUC: 0.4886, R2 Score: 0.001212, MAE: 1.8431, MSE: 5.2788
Training: Fold 2 - Total Loss: 2.3534, Cross Entropy: 1.227148, Accuracy: 0.4014, Precision: 0.2177, Recall: 0.4014, F1 Score: 0.2348, AUC: 0.5385, R2 Score: -0.006853, MAE: 1.7722, MSE: 4.9813
Training: Fold 3 - Total Loss: 2.3510, Cross Entropy: 1.253220, Accuracy: 0.3803, Precision: 0.267

### RegNetY-400MF

RegNet is an automated architecture developed by **Facebook AI Research (FAIR)** that optimizes the design of deep neural networks. The **RegNetY-400MF** variant is a computationally efficient model designed for **real-time inference and deployment on edge devices**. Unlike handcrafted architectures, RegNet is **automatically searched and optimized**, leading to a **well-balanced trade-off between accuracy, speed, and resource efficiency**.

- **Paper:** Radosavovic, I., Kosaraju, R. P., Girshick, R., He, K., & Dollár, P. (2020). *Designing Network Design Spaces*. [CVPR](https://arxiv.org/abs/2003.13678).


In [46]:
ARCH = "regnety-400mf"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: regnety-400mf
Initial Backbone Model Size: 17.61 MB
Modified Backbone Model Size: 16.07 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 27.1283, Cross Entropy: 1.321747, Accuracy: 0.3187, Precision: 0.3301, Recall: 0.3187, F1 Score: 0.3034, AUC: 0.5185, R2 Score: -15.749746, MAE: 8.9566, MSE: 87.3436
Training: Fold 2 - Total Loss: 10.0617, Cross Entropy: 1.285021, Accuracy: 0.3961, Precision: 0.3527, Recall: 0.3961, F1 Score: 0.3691, AUC: 0.5179, R2 Score: -5.079579, MAE: 4.9381, MSE: 30.5406
Training: Fold 3 - Total Loss: 2.8467, Cross Entropy: 1.258072, Accuracy: 0.3891, Precision: 0.2763, Recall: 0.3891, F1 Score: 0.3229, AUC: 0.5349, R2 Score: -0.302085, MAE: 2.0600, MSE: 6.5536
Training: Fold 4 - Total Loss: 2.6246, Cross Entropy: 1.252607, Accuracy: 0.3979, Precision: 0.2736, Recall: 0.3979, F1 Score: 0.3074, AUC: 0.5276, R2 Score: -0.079912, MAE: 1.9177, MSE: 5.8260
Validation: Fold 5 - Total Loss: 2.4417, Cross Entropy: 1.211370, Accuracy: 0.3873, Precision: 0.1

Training: Fold 1 - Total Loss: 2.4903, Cross Entropy: 1.235483, Accuracy: 0.4085, Precision: 0.3335, Recall: 0.4085, F1 Score: 0.3278, AUC: 0.5415, R2 Score: -0.048208, MAE: 1.8730, MSE: 5.4181
Training: Fold 2 - Total Loss: 2.3917, Cross Entropy: 1.227291, Accuracy: 0.4137, Precision: 0.5040, Recall: 0.4137, F1 Score: 0.3438, AUC: 0.5415, R2 Score: -0.045281, MAE: 1.8039, MSE: 5.1086
Training: Fold 3 - Total Loss: 2.3653, Cross Entropy: 1.241196, Accuracy: 0.3996, Precision: 0.2818, Recall: 0.3996, F1 Score: 0.3273, AUC: 0.5524, R2 Score: -0.027736, MAE: 1.7725, MSE: 4.9882
Training: Fold 4 - Total Loss: 2.5277, Cross Entropy: 1.264391, Accuracy: 0.3996, Precision: 0.2933, Recall: 0.3996, F1 Score: 0.3383, AUC: 0.4930, R2 Score: -0.040681, MAE: 1.8634, MSE: 5.4754
Validation: Fold 5 - Total Loss: 2.4052, Cross Entropy: 1.182176, Accuracy: 0.4225, Precision: 0.2959, Recall: 0.4225, F1 Score: 0.3233, AUC: 0.6226, R2 Score: 0.003698, MAE: 1.8208, MSE: 5.2589
Avg Latency (ms): 3.87, Avg M

### MNASNet-0.5

MNASNet was developed by Google as part of the **AutoML** framework to **optimize inference speed on mobile processors**. Unlike traditional CNNs, MNASNet **co-optimizes both accuracy and latency** using **neural architecture search (NAS)**, making it highly efficient for mobile applications.

- **Paper:** Tan, M., Chen, B., Pang, R., et al. (2019). *MNASNet: Platform-Aware Neural Architecture Search for Mobile*. [CVPR](https://arxiv.org/abs/1807.11626).


In [50]:
ARCH = "mnasnet0_5"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: mnasnet0_5
Initial Backbone Model Size: 9.04 MB
Modified Backbone Model Size: 4.58 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 29.2726, Cross Entropy: 1.418778, Accuracy: 0.1373, Precision: 0.3367, Recall: 0.1373, F1 Score: 0.1232, AUC: 0.4847, R2 Score: -17.083751, MAE: 9.4002, MSE: 94.2648
Training: Fold 2 - Total Loss: 16.6582, Cross Entropy: 1.370149, Accuracy: 0.3539, Precision: 0.5432, Recall: 0.3539, F1 Score: 0.2667, AUC: 0.5336, R2 Score: -9.577161, MAE: 6.8324, MSE: 52.3303
Training: Fold 3 - Total Loss: 7.5058, Cross Entropy: 1.294919, Accuracy: 0.3750, Precision: 0.4589, Recall: 0.3750, F1 Score: 0.2329, AUC: 0.5211, R2 Score: -3.534603, MAE: 4.1719, MSE: 21.9977
Training: Fold 4 - Total Loss: 3.6827, Cross Entropy: 1.252988, Accuracy: 0.3926, Precision: 0.2483, Recall: 0.3926, F1 Score: 0.2724, AUC: 0.4709, R2 Score: -0.779007, MAE: 2.5583, MSE: 9.3519
Validation: Fold 5 - Total Loss: 35.0131, Cross Entropy: 1.381918, Accuracy: 0.3099, Precision: 0.0960

### ConvNeXt-Tiny

ConvNeXt is a **modernized version of ResNet**, integrating **design elements from vision transformers** while maintaining the efficiency of convolutional networks. The **ConvNeXt-Tiny** variant is a **lightweight** version optimized for efficiency while maintaining strong generalization capabilities.

- **Paper:** Liu, Z., Mao, H., Wu, C. Y., et al. (2022). *A ConvNet for the 2020s*. [CVPR](https://arxiv.org/abs/2201.03545).


In [47]:
ARCH = "convnext-tiny"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: convnext-tiny
Initial Backbone Model Size: 114.41 MB
Modified Backbone Model Size: 111.72 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 12.8201, Cross Entropy: 1.400498, Accuracy: 0.3028, Precision: 0.1047, Recall: 0.3028, F1 Score: 0.1556, AUC: 0.4784, R2 Score: -6.702325, MAE: 5.2561, MSE: 39.4657
Training: Fold 2 - Total Loss: 2.8738, Cross Entropy: 1.255329, Accuracy: 0.4085, Precision: 0.3132, Recall: 0.4085, F1 Score: 0.2739, AUC: 0.5114, R2 Score: -0.392105, MAE: 2.0710, MSE: 6.6501
Training: Fold 3 - Total Loss: 2.8100, Cross Entropy: 1.273609, Accuracy: 0.3609, Precision: 0.2941, Recall: 0.3609, F1 Score: 0.3057, AUC: 0.4691, R2 Score: -0.377580, MAE: 2.0140, MSE: 6.3948
Training: Fold 4 - Total Loss: 2.8523, Cross Entropy: 1.246747, Accuracy: 0.3803, Precision: 0.2402, Recall: 0.3803, F1 Score: 0.2695, AUC: 0.5149, R2 Score: -0.283264, MAE: 2.0637, MSE: 6.5988
Validation: Fold 5 - Total Loss: 2.7630, Cross Entropy: 1.237690, Accuracy: 0.3873, Precision: 0.15

Training: Fold 1 - Total Loss: 2.4993, Cross Entropy: 1.259872, Accuracy: 0.3873, Precision: 0.2813, Recall: 0.3873, F1 Score: 0.3228, AUC: 0.4814, R2 Score: -0.024604, MAE: 1.8584, MSE: 5.3913
Training: Fold 2 - Total Loss: 2.4590, Cross Entropy: 1.256395, Accuracy: 0.3680, Precision: 0.2422, Recall: 0.3680, F1 Score: 0.2732, AUC: 0.4518, R2 Score: -0.091377, MAE: 1.8350, MSE: 5.2651
Training: Fold 3 - Total Loss: 2.4123, Cross Entropy: 1.264451, Accuracy: 0.3415, Precision: 0.2657, Recall: 0.3415, F1 Score: 0.2614, AUC: 0.5095, R2 Score: -0.062604, MAE: 1.7875, MSE: 5.0907
Training: Fold 4 - Total Loss: 2.5206, Cross Entropy: 1.250292, Accuracy: 0.4014, Precision: 0.2763, Recall: 0.4014, F1 Score: 0.3152, AUC: 0.4975, R2 Score: -0.038814, MAE: 1.8582, MSE: 5.4847
Validation: Fold 5 - Total Loss: 2.4391, Cross Entropy: 1.238453, Accuracy: 0.3873, Precision: 0.1500, Recall: 0.3873, F1 Score: 0.2163, AUC: 0.5799, R2 Score: -0.014832, MAE: 1.8243, MSE: 5.2406
Avg Latency (ms): 13.09, Avg

### GhostNetV2

GhostNet introduces **"ghost modules"**, which create additional feature maps using cheap operations instead of standard convolutions. This results in **significant reductions in computation and memory usage**, making GhostNet **highly efficient for mobile vision applications**.

- **Paper:** Han, K., Wang, Y., Tian, Q., et al. (2020). *GhostNet: More Features from Cheap Operations*. [CVPR](https://arxiv.org/abs/1911.11907).


In [48]:
ARCH = "ghostnetv2"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: ghostnetv2
Initial Backbone Model Size: 25.11 MB
Modified Backbone Model Size: 20.64 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 28.4155, Cross Entropy: 1.379221, Accuracy: 0.2887, Precision: 0.1470, Recall: 0.2887, F1 Score: 0.1941, AUC: 0.5097, R2 Score: -16.536165, MAE: 9.2418, MSE: 91.5000
Training: Fold 2 - Total Loss: 8.5655, Cross Entropy: 1.381703, Accuracy: 0.3257, Precision: 0.1078, Recall: 0.3257, F1 Score: 0.1620, AUC: 0.5219, R2 Score: -4.081655, MAE: 4.2917, MSE: 25.3277
Training: Fold 3 - Total Loss: 3.0807, Cross Entropy: 1.302108, Accuracy: 0.3504, Precision: 0.2404, Recall: 0.3504, F1 Score: 0.2785, AUC: 0.4806, R2 Score: -0.488305, MAE: 2.1438, MSE: 7.2306
Training: Fold 4 - Total Loss: 2.6303, Cross Entropy: 1.257928, Accuracy: 0.4102, Precision: 0.2740, Recall: 0.4102, F1 Score: 0.2837, AUC: 0.4839, R2 Score: -0.070138, MAE: 1.9468, MSE: 5.8324
Validation: Fold 5 - Total Loss: 2.7508, Cross Entropy: 1.238332, Accuracy: 0.3873, Precision: 0.1500,

### TinyNet-A

TinyNet is designed for **extreme efficiency**, balancing depth, width, and resolution to **optimize model size without sacrificing accuracy**.

- **Paper:** Han, K., Wang, Y., Zhang, Q., et al. (2020). *Model Rubik’s Cube: Twisting Resolution, Depth and Width for TinyNets*. [NeurIPS](https://arxiv.org/abs/2109.15027).

In [49]:
ARCH = "tinynet-a"
BATCH_SIZE = 32
EPOCHS = 150
FOLDS = 5

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss functions
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Multi-class classification loss
mse_loss = torch.nn.MSELoss()  # Regression loss
mae_loss = torch.nn.L1Loss()  # Regression loss

# Set up 5-Fold Cross Validation
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

# Model Saving Directory
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

metrics_dir = "metrics"
os.makedirs(metrics_dir, exist_ok=True)

print("=" * 100)
print(f"Training Model: {ARCH}")

# === INITIALIZE MODEL ===
model = MultiModel(ARCH).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_acc = -float("inf")  # Track best validation accuracy
train_metrics_list = []
val_metrics_list = []

# === TRAINING LOOP ===
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    fold = 1

    for train_idx, val_idx in kf.split(range(len(image_dataset))):
        train_subset = Subset(image_dataset, train_idx)
        val_subset = Subset(image_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        if fold == FOLDS:
            # === VALIDATION PHASE ===
            val_metrics, val_stats = eval(val_loader, model, cross_entropy_loss, mse_loss, mae_loss)
            print(
                f"Validation: Fold {fold} - Total Loss: {val_metrics[0]:.4f}, Cross Entropy: {val_metrics[1]:4f}, Accuracy: {val_metrics[2]:.4f}, "
                f"Precision: {val_metrics[3]:.4f}, Recall: {val_metrics[4]:.4f}, F1 Score: {val_metrics[5]:.4f}, AUC: {val_metrics[6]:.4f}, "
                f"R2 Score: {val_metrics[7]:4f}, MAE: {val_metrics[8]:.4f}, MSE: {val_metrics[9]:.4f}"
            )
            print(
                f"Avg Latency (ms): {val_stats[0]:.2f}, Avg Memory Before (MB): {val_stats[1]:.2f}, "
                f"Avg Memory After (MB): {val_stats[2]:.2f}, Avg Max Memory (MB): {val_stats[3]:.2f}"
            )

            # Save best model based on validation accuracy
            if val_metrics[2] > best_val_acc:
                best_val_acc = val_metrics[2]
                torch.save(
                    model.state_dict(),
                    f"{weights_dir}/pytorch/model_best_accuracy_{ARCH}_{signature}.pth",
                )
                print(f"Best model saved with Accuracy: {best_val_acc:.4f}")

            # Store validation metrics
            val_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": val_metrics[0],
                    "cross_entropy_loss": val_metrics[1],
                    "accuracy": val_metrics[2],
                    "precision": val_metrics[3],
                    "recall": val_metrics[4],
                    "f1_score": val_metrics[5],
                    "auc": val_metrics[6],
                    "r2_score": val_metrics[7],
                    "mae_loss": val_metrics[8],
                    "mse_loss": val_metrics[9],
                    "latency": val_stats[0],
                    "malloc_before": val_stats[1],
                    "malloc_after": val_stats[2],
                    "max_malloc": val_stats[3],
                }
            )

        else:
            # === TRAINING PHASE ===
            train_metrics = train(train_loader, model, cross_entropy_loss, mse_loss, mae_loss, optimizer)
            print(
                f"Training: Fold {fold} - Total Loss: {train_metrics[0]:.4f}, Cross Entropy: {train_metrics[1]:4f}, Accuracy: {train_metrics[2]:.4f}, "
                f"Precision: {train_metrics[3]:.4f}, Recall: {train_metrics[4]:.4f}, F1 Score: {train_metrics[5]:.4f}, AUC: {train_metrics[6]:.4f}, "
                f"R2 Score: {train_metrics[7]:4f}, MAE: {train_metrics[8]:.4f}, MSE: {train_metrics[9]:.4f}"
            )

            # Store training metrics
            train_metrics_list.append(
                {
                    "epoch": epoch + 1,
                    "fold": fold,
                    "total_loss": train_metrics[0],
                    "cross_entropy_loss": train_metrics[1],
                    "accuracy": train_metrics[2],
                    "precision": train_metrics[3],
                    "recall": train_metrics[4],
                    "f1_score": train_metrics[5],
                    "auc": train_metrics[6],
                    "r2_score": train_metrics[7],
                    "mae_loss": train_metrics[8],
                    "mse_loss": train_metrics[9],
                }
            )

        fold += 1  # Move to next fold
    
    keys = train_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/training_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(train_metrics_list)

    keys = val_metrics_list[0].keys()
    with open(f"{metrics_dir}/pytorch/validation_metrics_{ARCH}_{signature}.csv", 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(val_metrics_list)

print(f"\nFine-tuned {get_model_size(model)}")
print("=" * 100)

Training Model: tinynet-a
Initial Backbone Model Size: 25.08 MB
Modified Backbone Model Size: 20.62 MB


Epoch 1/150
Training: Fold 1 - Total Loss: 31.2773, Cross Entropy: 1.362520, Accuracy: 0.3292, Precision: 0.3371, Recall: 0.3292, F1 Score: 0.2290, AUC: 0.5347, R2 Score: -18.319000, MAE: 9.7653, MSE: 101.0783
Training: Fold 2 - Total Loss: 23.9671, Cross Entropy: 1.352206, Accuracy: 0.2870, Precision: 0.2375, Recall: 0.2870, F1 Score: 0.2137, AUC: 0.5365, R2 Score: -14.345790, MAE: 8.3877, MSE: 76.7351
Training: Fold 3 - Total Loss: 15.7643, Cross Entropy: 1.334208, Accuracy: 0.3451, Precision: 0.1526, Recall: 0.3451, F1 Score: 0.2116, AUC: 0.5480, R2 Score: -9.138172, MAE: 6.6243, MSE: 49.4345
Training: Fold 4 - Total Loss: 9.2992, Cross Entropy: 1.273267, Accuracy: 0.4173, Precision: 0.1741, Recall: 0.4173, F1 Score: 0.2457, AUC: 0.5249, R2 Score: -4.225791, MAE: 4.7668, MSE: 28.0262
Validation: Fold 5 - Total Loss: 5.8403, Cross Entropy: 1.242029, Accuracy: 0.3873, Precision: 0.

## Running Inference on All 12 Models:

In [53]:
for arch in models_list:
  test_metrics_list = []
  print("="*100)
  print(f"{arch}")
  model = MultiModel(arch).to(device)
  model.load_state_dict(torch.load(f"{weights_dir}/pytorch/model_best_accuracy_{arch}_{signature}.pth"))

  # === Testing PHASE ===
  test_metrics, test_stats = eval(test_loader, model, cross_entropy_loss, mse_loss, mae_loss)
  print(
      f"Testing: Total Loss: {test_metrics[0]:.4f}, Cross Entropy: {test_metrics[1]:4f}, Accuracy: {test_metrics[2]:.4f}, "
      f"Precision: {test_metrics[3]:.4f}, Recall: {test_metrics[4]:.4f}, F1 Score: {test_metrics[5]:.4f}, AUC: {test_metrics[6]:.4f}, "
      f"R2 Score: {test_metrics[7]:4f}, MAE: {test_metrics[8]:.4f}, MSE: {test_metrics[9]:.4f}"
  )
  print(
      f"Avg Latency (ms): {test_stats[0]:.2f}, Avg Memory Before (MB): {test_stats[1]:.2f}, "
      f"Avg Memory After (MB): {test_stats[2]:.2f}, Avg Max Memory (MB): {test_stats[3]:.2f}"
  )

  # Store validation metrics
  test_metrics_list.append(
      {
          "total_loss": test_metrics[0],
          "cross_entropy_loss": test_metrics[1],
          "accuracy": test_metrics[2],
          "precision": test_metrics[3],
          "recall": test_metrics[4],
          "f1_score": test_metrics[5],
          "auc": test_metrics[6],
          "r2_score": test_metrics[7],
          "mae_loss": test_metrics[8],
          "mse_loss": test_metrics[9],
          "latency": test_stats[0],
          "malloc_before": test_stats[1],
          "malloc_after": test_stats[2],
          "max_malloc": test_stats[3],
      }
  )

  keys = test_metrics_list[0].keys()
  with open(f"{metrics_dir}/pytorch/testing_metrics_{arch}_{signature}.csv", 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(test_metrics_list)

mobilenetv2
Initial Backbone Model Size: 14.24 MB
Modified Backbone Model Size: 9.78 MB

Testing: Total Loss: 1.7886, Cross Entropy: 0.374430, Accuracy: 0.8239, Precision: 0.8128, Recall: 0.8239, F1 Score: 0.8158, AUC: 0.9603, R2 Score: 0.005458, MAE: 1.8971, MSE: 5.0883
Avg Latency (ms): 3.93, Avg Memory Before (MB): 58.84, Avg Memory After (MB): 58.84, Avg Max Memory (MB): 343.91
resnet18
Initial Backbone Model Size: 46.83 MB
Modified Backbone Model Size: 45.04 MB

Testing: Total Loss: 1.7478, Cross Entropy: 0.304619, Accuracy: 0.8662, Precision: 0.8718, Recall: 0.8662, F1 Score: 0.8673, AUC: 0.9667, R2 Score: 0.007559, MAE: 1.9129, MSE: 5.1151
Avg Latency (ms): 3.20, Avg Memory Before (MB): 92.46, Avg Memory After (MB): 92.46, Avg Max Memory (MB): 266.41
densenet121
Initial Backbone Model Size: 32.47 MB
Modified Backbone Model Size: 28.90 MB

Testing: Total Loss: 1.6863, Cross Entropy: 0.238761, Accuracy: 0.8803, Precision: 0.8844, Recall: 0.8803, F1 Score: 0.8770, AUC: 0.9762, R2 S