In [1]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
from pathlib import Path
from torchvision.models import resnet50, ResNet50_Weights
import torch
import torch.nn as nn
from torch.amp import autocast, GradScaler
from torch.utils.data import DataLoader
from torchvision import transforms
from tqdm import tqdm
import pandas as pd
import numpy as np
import os
from PIL import Image
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score, precision_score, recall_score, hamming_loss

In [24]:
# === CONSTANTS === #
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BASE_DIR = Path("/content/drive/MyDrive/Deep Learning Project Group")
TRAIN_CSV = BASE_DIR / "CheXRetriever/data/train/train_metadata_merged.csv"
VAL_CSV = BASE_DIR / "CheXRetriever/data/val/val_metadata_merged.csv"
IMAGE_ROOT = BASE_DIR / "CheXRetriever/data"
LABELS = [
    "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged Cardiomediastinum",
    "Fracture", "Lung Lesion", "Lung Opacity", "No Finding", "Pleural Effusion",
    "Pleural Other", "Pneumonia", "Pneumothorax", "Support Devices"
]

In [25]:
# === DATA === #
def load_split(csv_path, split):
    df = pd.read_csv(csv_path)
    df["split"] = split
    df["filename"] = df["path_to_image"].apply(lambda x: "_".join(x.split("/")).replace(".jpg", ".png"))
    df[LABELS] = df[LABELS].fillna(0).replace(-1, 0)
    return df[["filename", "split"] + LABELS]

train_df = load_split(TRAIN_CSV, "train")
val_df = load_split(VAL_CSV, "val")

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

In [26]:
class CheXpertDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, image_root, transform):
        self.df = dataframe.reset_index(drop=True)
        self.image_root = image_root
        self.transform = transform

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = self.image_root / row["split"] / "images" / row["filename"]
        image = Image.open(image_path).convert("RGB")
        image = self.transform(image)
        labels = torch.tensor(row[LABELS].values.astype(np.float32))
        return image, labels

    def __len__(self):
        return len(self.df)

train_ds = CheXpertDataset(train_df, IMAGE_ROOT, image_transform)
val_ds = CheXpertDataset(val_df, IMAGE_ROOT, image_transform)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=os.cpu_count(), pin_memory=True)
val_dl = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=os.cpu_count(), pin_memory=True)

In [27]:
print("Train size:", train_df.shape)
print("Val size:", val_df.shape)

Train size: (20602, 16)
Val size: (4415, 16)


In [28]:
# === MODEL === #
class ResNetClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        base = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.encoder = nn.Sequential(*list(base.children())[:-1])
        for name, param in self.encoder.named_parameters():
            if "layer4" in name:
                param.requires_grad = True  # fine-tune last ResNet block
            else:
                param.requires_grad = False
        self.dropout = nn.Dropout(p=0.3)
        self.classifier = nn.Linear(2048, len(LABELS))

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        return self.classifier(x)

In [29]:
# === EVALUATE === #
def evaluate(model, val_dl):
    model.eval()
    all_labels, all_preds = [], []
    with torch.no_grad():
        for images, labels in val_dl:
            images = images.to(DEVICE)
            outputs = model(images).sigmoid().cpu()
            all_preds.append(outputs)
            all_labels.append(labels)

    all_preds = torch.cat(all_preds).numpy()
    all_labels = torch.cat(all_labels).numpy()
    pred_bin = (all_preds > 0.5).astype(int)

    results = {}
    results['AUROC'] = [roc_auc_score(all_labels[:, i], all_preds[:, i]) for i in range(len(LABELS))]
    results['F1'] = [f1_score(all_labels[:, i], pred_bin[:, i], zero_division=0) for i in range(len(LABELS))]
    results['mAP'] = np.nanmean([average_precision_score(all_labels[:, i], all_preds[:, i]) for i in range(len(LABELS))])
    results['Precision'] = np.nanmean([precision_score(all_labels[:, i], pred_bin[:, i], zero_division=0) for i in range(len(LABELS))])
    results['Recall'] = np.nanmean([recall_score(all_labels[:, i], pred_bin[:, i], zero_division=0) for i in range(len(LABELS))])
    results['Hamming Loss'] = hamming_loss(all_labels, pred_bin)
    results['Mean AUROC'] = np.nanmean(results['AUROC'])
    results['Mean F1'] = np.nanmean(results['F1'])

    return results

In [30]:
# === TRAIN === #
model = ResNetClassifier().to(DEVICE)
model.load_state_dict(torch.load(BASE_DIR / "resnet_model/best_resnet_classifier.pt"))
model.train()

optimizer = torch.optim.Adam([
    {"params": model.encoder.parameters(), "lr": 1e-5},
    {"params": model.classifier.parameters(), "lr": 1e-4}
])
criterion = nn.BCEWithLogitsLoss()
scaler = torch.amp.GradScaler()
best_val_auroc = 0.0
save_path = BASE_DIR / "resnet_model/best_resnet_finetuned.pt"

EPOCHS = 5
for epoch in range(EPOCHS):
    total_loss = 0
    loop = tqdm(train_dl, desc=f"Fine-Tune Epoch {epoch+1}/{EPOCHS}")

    for images, targets in loop:
        images, targets = images.to(DEVICE), targets.to(DEVICE)
        with torch.amp.autocast('cuda'):
            outputs = model(images)
            loss = criterion(outputs, targets)

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    print(f"Epoch {epoch+1}: Avg Train Loss = {total_loss / len(train_dl):.4f}")

    metrics = evaluate(model, val_dl)

    print("\n=== Evaluation Results ===")
    print(f"Mean AUROC:     {metrics['Mean AUROC']:.4f}")
    print(f"Mean AP (mAP):  {metrics['mAP']:.4f}")
    print(f"Mean F1 Score:  {metrics['Mean F1']:.4f}")
    print(f"Mean Precision: {metrics['Precision']:.4f}")
    print(f"Mean Recall:    {metrics['Recall']:.4f}")
    print(f"Hamming Loss:   {metrics['Hamming Loss']:.4f}\n")

    for i, lbl in enumerate(LABELS):
        print(f"{lbl:25s}: AUROC={metrics['AUROC'][i]:.3f}, F1={metrics['F1'][i]:.3f}")

    # ✅ Save model if AUROC improved
    if metrics["Mean AUROC"] > best_val_auroc:
        best_val_auroc = metrics["Mean AUROC"]
        torch.save(model.state_dict(), save_path)
        print(f"✅ New best model saved to {save_path}")

Fine-Tune Epoch 1/5: 100%|██████████| 322/322 [05:03<00:00,  1.06it/s, loss=0.238]

Epoch 1: Avg Train Loss = 0.2944






=== Evaluation Results ===
Mean AUROC:     0.6624
Mean AP (mAP):  0.2268
Mean F1 Score:  0.0795
Mean Precision: 0.2529
Mean Recall:    0.0514
Hamming Loss:   0.1148

Atelectasis              : AUROC=0.660, F1=0.000
Cardiomegaly             : AUROC=0.706, F1=0.000
Consolidation            : AUROC=0.615, F1=0.000
Edema                    : AUROC=0.778, F1=0.008
Enlarged Cardiomediastinum: AUROC=0.539, F1=0.000
Fracture                 : AUROC=0.562, F1=0.000
Lung Lesion              : AUROC=0.584, F1=0.000
Lung Opacity             : AUROC=0.660, F1=0.229
No Finding               : AUROC=0.751, F1=0.185
Pleural Effusion         : AUROC=0.792, F1=0.363
Pleural Other            : AUROC=0.637, F1=0.000
Pneumonia                : AUROC=0.568, F1=0.000
Pneumothorax             : AUROC=0.688, F1=0.000
Support Devices          : AUROC=0.734, F1=0.328
✅ New best model saved to /content/drive/MyDrive/Deep Learning Project Group/resnet_model/best_resnet_finetuned.pt


Fine-Tune Epoch 2/5: 100%|██████████| 322/322 [00:31<00:00, 10.14it/s, loss=0.321]

Epoch 2: Avg Train Loss = 0.2911






=== Evaluation Results ===
Mean AUROC:     0.6640
Mean AP (mAP):  0.2277
Mean F1 Score:  0.0824
Mean Precision: 0.2494
Mean Recall:    0.0542
Hamming Loss:   0.1149

Atelectasis              : AUROC=0.661, F1=0.000
Cardiomegaly             : AUROC=0.707, F1=0.000
Consolidation            : AUROC=0.618, F1=0.000
Edema                    : AUROC=0.781, F1=0.008
Enlarged Cardiomediastinum: AUROC=0.540, F1=0.000
Fracture                 : AUROC=0.565, F1=0.000
Lung Lesion              : AUROC=0.584, F1=0.000
Lung Opacity             : AUROC=0.660, F1=0.235
No Finding               : AUROC=0.753, F1=0.193
Pleural Effusion         : AUROC=0.793, F1=0.377
Pleural Other            : AUROC=0.640, F1=0.000
Pneumonia                : AUROC=0.567, F1=0.000
Pneumothorax             : AUROC=0.690, F1=0.000
Support Devices          : AUROC=0.735, F1=0.342
✅ New best model saved to /content/drive/MyDrive/Deep Learning Project Group/resnet_model/best_resnet_finetuned.pt


Fine-Tune Epoch 3/5: 100%|██████████| 322/322 [00:32<00:00, 10.02it/s, loss=0.305]

Epoch 3: Avg Train Loss = 0.2903






=== Evaluation Results ===
Mean AUROC:     0.6653
Mean AP (mAP):  0.2283
Mean F1 Score:  0.0864
Mean Precision: 0.2494
Mean Recall:    0.0579
Hamming Loss:   0.1146

Atelectasis              : AUROC=0.664, F1=0.000
Cardiomegaly             : AUROC=0.708, F1=0.000
Consolidation            : AUROC=0.621, F1=0.000
Edema                    : AUROC=0.783, F1=0.008
Enlarged Cardiomediastinum: AUROC=0.540, F1=0.000
Fracture                 : AUROC=0.566, F1=0.000
Lung Lesion              : AUROC=0.586, F1=0.000
Lung Opacity             : AUROC=0.661, F1=0.275
No Finding               : AUROC=0.753, F1=0.194
Pleural Effusion         : AUROC=0.794, F1=0.404
Pleural Other            : AUROC=0.644, F1=0.000
Pneumonia                : AUROC=0.569, F1=0.000
Pneumothorax             : AUROC=0.691, F1=0.000
Support Devices          : AUROC=0.736, F1=0.330
✅ New best model saved to /content/drive/MyDrive/Deep Learning Project Group/resnet_model/best_resnet_finetuned.pt


Fine-Tune Epoch 4/5: 100%|██████████| 322/322 [00:32<00:00, 10.04it/s, loss=0.273]

Epoch 4: Avg Train Loss = 0.2895






=== Evaluation Results ===
Mean AUROC:     0.6666
Mean AP (mAP):  0.2291
Mean F1 Score:  0.0899
Mean Precision: 0.2450
Mean Recall:    0.0615
Hamming Loss:   0.1147

Atelectasis              : AUROC=0.665, F1=0.000
Cardiomegaly             : AUROC=0.708, F1=0.000
Consolidation            : AUROC=0.623, F1=0.000
Edema                    : AUROC=0.784, F1=0.008
Enlarged Cardiomediastinum: AUROC=0.543, F1=0.000
Fracture                 : AUROC=0.565, F1=0.000
Lung Lesion              : AUROC=0.588, F1=0.000
Lung Opacity             : AUROC=0.661, F1=0.249
No Finding               : AUROC=0.754, F1=0.226
Pleural Effusion         : AUROC=0.795, F1=0.422
Pleural Other            : AUROC=0.644, F1=0.000
Pneumonia                : AUROC=0.570, F1=0.000
Pneumothorax             : AUROC=0.695, F1=0.000
Support Devices          : AUROC=0.738, F1=0.355
✅ New best model saved to /content/drive/MyDrive/Deep Learning Project Group/resnet_model/best_resnet_finetuned.pt


Fine-Tune Epoch 5/5: 100%|██████████| 322/322 [00:31<00:00, 10.10it/s, loss=0.263]

Epoch 5: Avg Train Loss = 0.2887






=== Evaluation Results ===
Mean AUROC:     0.6675
Mean AP (mAP):  0.2298
Mean F1 Score:  0.0879
Mean Precision: 0.2478
Mean Recall:    0.0593
Hamming Loss:   0.1146

Atelectasis              : AUROC=0.664, F1=0.000
Cardiomegaly             : AUROC=0.711, F1=0.000
Consolidation            : AUROC=0.625, F1=0.000
Edema                    : AUROC=0.786, F1=0.008
Enlarged Cardiomediastinum: AUROC=0.541, F1=0.000
Fracture                 : AUROC=0.564, F1=0.000
Lung Lesion              : AUROC=0.589, F1=0.000
Lung Opacity             : AUROC=0.661, F1=0.242
No Finding               : AUROC=0.754, F1=0.215
Pleural Effusion         : AUROC=0.795, F1=0.412
Pleural Other            : AUROC=0.649, F1=0.000
Pneumonia                : AUROC=0.571, F1=0.000
Pneumothorax             : AUROC=0.697, F1=0.000
Support Devices          : AUROC=0.738, F1=0.352
✅ New best model saved to /content/drive/MyDrive/Deep Learning Project Group/resnet_model/best_resnet_finetuned.pt


In [31]:
metrics = evaluate(model, val_dl)

print("\n=== Evaluation Results ===")
print(f"Mean AUROC:     {metrics['Mean AUROC']:.4f}")
print(f"Mean AP (mAP):  {metrics['mAP']:.4f}")
print(f"Mean F1 Score:  {metrics['Mean F1']:.4f}")
print(f"Mean Precision: {metrics['Precision']:.4f}")
print(f"Mean Recall:    {metrics['Recall']:.4f}")
print(f"Hamming Loss:   {metrics['Hamming Loss']:.4f}\n")

for i, lbl in enumerate(LABELS):
    print(f"{lbl:25s}: AUROC={metrics['AUROC'][i]:.3f}, F1={metrics['F1'][i]:.3f}")


=== Evaluation Results ===
Mean AUROC:     0.6675
Mean AP (mAP):  0.2298
Mean F1 Score:  0.0879
Mean Precision: 0.2478
Mean Recall:    0.0593
Hamming Loss:   0.1146

Atelectasis              : AUROC=0.664, F1=0.000
Cardiomegaly             : AUROC=0.711, F1=0.000
Consolidation            : AUROC=0.625, F1=0.000
Edema                    : AUROC=0.786, F1=0.008
Enlarged Cardiomediastinum: AUROC=0.541, F1=0.000
Fracture                 : AUROC=0.564, F1=0.000
Lung Lesion              : AUROC=0.589, F1=0.000
Lung Opacity             : AUROC=0.661, F1=0.242
No Finding               : AUROC=0.754, F1=0.215
Pleural Effusion         : AUROC=0.795, F1=0.412
Pleural Other            : AUROC=0.649, F1=0.000
Pneumonia                : AUROC=0.571, F1=0.000
Pneumothorax             : AUROC=0.697, F1=0.000
Support Devices          : AUROC=0.738, F1=0.352
