# Model Training

In [3]:

import torch
from torch.optim import Optimizer
from torch import optim, nn

from pneumonia_x_ray_images_classifier.modeling.model import PneumoniaClassifierModel
from torchvision import transforms

from pneumonia_x_ray_images_classifier.data.make_dataset import get_latest_pneumonia_dataset
from pneumonia_x_ray_images_classifier.dataset import PneumoniaDataset
from pneumonia_x_ray_images_classifier.config import PROJ_ROOT
from pathlib import Path

root_dir = get_latest_pneumonia_dataset()
print(root_dir)

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

train_dataset = PneumoniaDataset(root_dir, split='train', transform=train_transforms)
val_dataset = PneumoniaDataset(root_dir, split='val', transform=val_transforms)

train_dataset_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=32,
    shuffle=True
)

val_dataset_loader = torch.utils.data.DataLoader(
    dataset=val_dataset,
    batch_size=32,
    shuffle=False
)


def make_model(learning_rate=1e-3, dropout=0.0, freeze_bn=True, unfreeze_last_n: int = 0, weight_decay: float = 0):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = PneumoniaClassifierModel(dropout=dropout, freeze_backbone=freeze_bn, unfreeze_last_n=unfreeze_last_n)
    model.to(device)

    optimiser = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate,
                           weight_decay=weight_decay)

    return model, optimiser, device


def train_and_evaluate(name: str, model: nn.Module, optimizer: Optimizer, criterion,
                       train_loader: torch.utils.data.DataLoader, val_loader: torch.utils.data.DataLoader,
                       device: torch.device, num_epochs: int, enable_checkpointing=False):
    best_val_recall = 0
    for epoch in range(num_epochs):
        model.train()
        train_running_loss = 0
        train_running_corrects = 0
        train_running_total = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_running_loss += loss.item()
            _, predicted_indices = torch.max(outputs.data, 1)
            train_running_total += labels.size(0)
            train_running_corrects += (predicted_indices == labels).sum().item()

        train_loss = train_running_loss / len(train_loader)
        train_acc = train_running_corrects / train_running_total

        #Validation phase
        model.eval()
        val_running_loss = 0
        val_running_corrects = 0
        val_running_total = 0
        tp = 0
        fn = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)

                loss = criterion(outputs, labels)

                val_running_loss += loss.item()
                _, predicted_indices = torch.max(outputs, 1)
                val_running_total += labels.size(0)
                val_running_corrects += (predicted_indices == labels).sum().item()

                pneu = (labels == 1)
                tp += ((predicted_indices == 1) & pneu).sum().item()  #Correctly predicted pneumonia cases
                fn += ((predicted_indices == 0) & pneu).sum().item()  #Missed pneumonia cases (the dangerous ones)

            val_loss = val_running_loss / len(val_loader)
            val_acc = val_running_corrects / val_running_total
            val_recall = tp / (tp + fn)  # how many were correctly predicted from all positive cases

            print(f'Epoch: {epoch + 1}')
            print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}')
            print(f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}, Val Recall(PNEU): {val_recall:.4f}')

        if enable_checkpointing and (val_recall > best_val_recall):
            best_val_recall = val_recall
            checkpoint_path = PROJ_ROOT / "models" / "checkpoints" / f'{name}_{epoch + 1:02d}_recall{val_recall:04f}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Model saved to {checkpoint_path}')

/Users/talhaakhoon/Documents/Dev/pneumonia_x_ray_images_classifier/data/processed


## Model 1. Training Baseline

In [9]:

Path('models/checkpoints').mkdir(parents=True, exist_ok=True)

lr = 1e-3
dr = 0.0
freeze_backbone = True
model, optimizer, device = make_model(learning_rate=lr, dropout=dr, freeze_bn=freeze_backbone)
criterion = nn.CrossEntropyLoss()

train_and_evaluate(
    enable_checkpointing=True,
    name=f"mobilenet_v2_lr{lr}_drop{dr}_{'' if freeze_backbone else 'Un'}frozen_best",
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_dataset_loader,
    val_loader=val_dataset_loader,
    device=device,
    num_epochs=10,
)


Epoch: 1
Train Loss: 0.2361, Train Accuracy: 0.9065
Val Loss: 0.1332, Val Accuracy: 0.9540, Val Recall(PNEU): 0.9755
Model saved to models/checkpoints/mobilenet_v2_lr0.001_drop0.0_frozen_best_01_recall0.975515.pth
Epoch: 2
Train Loss: 0.1608, Train Accuracy: 0.9341
Val Loss: 0.1147, Val Accuracy: 0.9617, Val Recall(PNEU): 0.9742
Epoch: 3
Train Loss: 0.1551, Train Accuracy: 0.9415
Val Loss: 0.1044, Val Accuracy: 0.9626, Val Recall(PNEU): 0.9832
Model saved to models/checkpoints/mobilenet_v2_lr0.001_drop0.0_frozen_best_03_recall0.983247.pth
Epoch: 4
Train Loss: 0.1255, Train Accuracy: 0.9487
Val Loss: 0.0989, Val Accuracy: 0.9655, Val Recall(PNEU): 0.9794
Epoch: 5
Train Loss: 0.1277, Train Accuracy: 0.9485
Val Loss: 0.0962, Val Accuracy: 0.9626, Val Recall(PNEU): 0.9755
Epoch: 6
Train Loss: 0.1342, Train Accuracy: 0.9492
Val Loss: 0.0985, Val Accuracy: 0.9655, Val Recall(PNEU): 0.9729
Epoch: 7
Train Loss: 0.1147, Train Accuracy: 0.9569
Val Loss: 0.1006, Val Accuracy: 0.9579, Val Recall(P

The model converged rapidly within the first few epochs, which is expected when training only a small classifier head on top of a pretrained backbone.

Training and validation losses decreased consistently across epochs, with no sustained divergence between them.

Validation accuracy remained stable in the mid-90% range throughout training.

Validation recall for pneumonia was consistently high (≈97–99%), indicating strong sensitivity to positive cases.

Even though the baseline generalises well, would explicit regularisation reduce variance and stabilise recall? Next model with standard strong regulariser drop rate of 0.5 

## Model 2. Training Baseline Model with Regularisation 0.5

In [10]:
lr = 1e-3
dr = 0.5
freeze_backbone = True
model, optimizer, device = make_model(learning_rate=lr, dropout=dr, freeze_bn=freeze_backbone)
criterion = nn.CrossEntropyLoss()

train_and_evaluate(
    enable_checkpointing=True,
    name=f"mobilenet_v2_lr{lr}_drop{dr}_{'' if freeze_backbone else 'Un'}frozen_best",
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_dataset_loader,
    val_loader=val_dataset_loader,
    device=device,
    num_epochs=10,
)

Epoch: 1
Train Loss: 0.2648, Train Accuracy: 0.8871
Val Loss: 0.1388, Val Accuracy: 0.9550, Val Recall(PNEU): 0.9729
Model saved to models/checkpoints/mobilenet_v2_lr0.001_drop0.5_frozen_best_01_recall0.972938.pth
Epoch: 2
Train Loss: 0.1886, Train Accuracy: 0.9240
Val Loss: 0.1294, Val Accuracy: 0.9598, Val Recall(PNEU): 0.9678
Epoch: 3
Train Loss: 0.1739, Train Accuracy: 0.9298
Val Loss: 0.1210, Val Accuracy: 0.9607, Val Recall(PNEU): 0.9704
Epoch: 4
Train Loss: 0.1683, Train Accuracy: 0.9355
Val Loss: 0.1303, Val Accuracy: 0.9540, Val Recall(PNEU): 0.9510
Epoch: 5
Train Loss: 0.2011, Train Accuracy: 0.9190
Val Loss: 0.1023, Val Accuracy: 0.9636, Val Recall(PNEU): 0.9807
Model saved to models/checkpoints/mobilenet_v2_lr0.001_drop0.5_frozen_best_05_recall0.980670.pth
Epoch: 6
Train Loss: 0.1714, Train Accuracy: 0.9350
Val Loss: 0.1399, Val Accuracy: 0.9531, Val Recall(PNEU): 0.9472
Epoch: 7
Train Loss: 0.1668, Train Accuracy: 0.9338
Val Loss: 0.1070, Val Accuracy: 0.9646, Val Recall(P

Adding dropout (0.5) to the frozen-head baseline did not improve validation recall, suggesting the baseline already generalised well and extra regularisation reduced sensitivity.

In [4]:
lr = 1e-4
dr = 0.5
freeze_backbone = False
unfreeze_layers = 3
decay = 1e-4
model, optimizer, device = make_model(learning_rate=lr, dropout=dr, freeze_bn=freeze_backbone,
                                      unfreeze_last_n=unfreeze_layers, weight_decay=decay)
criterion = nn.CrossEntropyLoss()

train_and_evaluate(
    enable_checkpointing=True,
    name=f"mobilenet_v2_lr{lr}_drop{dr}_{'' if freeze_backbone else 'Un'}frozen_layers_{unfreeze_layers}_best",
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_dataset_loader,
    val_loader=val_dataset_loader,
    device=device,
    num_epochs=10,
)

Epoch: 1
Train Loss: 0.1639, Train Accuracy: 0.9350
Val Loss: 0.0685, Val Accuracy: 0.9732, Val Recall(PNEU): 0.9871
Model saved to /Users/talhaakhoon/Documents/Dev/pneumonia_x_ray_images_classifier/models/checkpoints/mobilenet_v2_lr0.0001_drop0.5_Unfrozen_layers_3_best_01_recall0.987113.pth
Epoch: 2
Train Loss: 0.0788, Train Accuracy: 0.9720
Val Loss: 0.0555, Val Accuracy: 0.9799, Val Recall(PNEU): 0.9820
Epoch: 3
Train Loss: 0.0439, Train Accuracy: 0.9854
Val Loss: 0.0561, Val Accuracy: 0.9780, Val Recall(PNEU): 0.9794
Epoch: 4
Train Loss: 0.0324, Train Accuracy: 0.9904
Val Loss: 0.0426, Val Accuracy: 0.9856, Val Recall(PNEU): 0.9897
Model saved to /Users/talhaakhoon/Documents/Dev/pneumonia_x_ray_images_classifier/models/checkpoints/mobilenet_v2_lr0.0001_drop0.5_Unfrozen_layers_3_best_04_recall0.989691.pth
Epoch: 5
Train Loss: 0.0166, Train Accuracy: 0.9940
Val Loss: 0.0534, Val Accuracy: 0.9789, Val Recall(PNEU): 0.9948
Model saved to /Users/talhaakhoon/Documents/Dev/pneumonia_x_ray

Best model seems to be mobilenet_v2_lr0.0001_drop0.5_Unfrozen_layers_3_best_05_recall0.994845.pth