In [2]:
import os
import cv2
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch import amp

import timm
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score
import pandas as pd
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

torch.backends.cudnn.benchmark = True


Using device: cuda


In [3]:
# 1. Load the NIH ChestX-ray14 metadata
df = pd.read_csv("Data_Entry_2017.csv")

# 2. Define all 14 disease classes
all_labels = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass',
    'Nodule','Pneumonia','Pneumothorax','Consolidation','Edema',
    'Emphysema','Fibrosis','Pleural_Thickening','Hernia'
]

# 3. Encode labels
def encode_labels(label_str):
    vec = [0] * 14
    if label_str != "No Finding":
        for disease in label_str.split('|'):
            if disease in all_labels:
                vec[all_labels.index(disease)] = 1
    return vec

df['labels'] = df['Finding Labels'].apply(encode_labels)
labels_expanded = pd.DataFrame(df['labels'].tolist(), columns=all_labels)
df_clean = pd.concat([df[['Image Index']], labels_expanded], axis=1)

# Save clean CSV
df_clean.to_csv("labels_clean.csv", index=False)
print("‚úÖ Saved labels_clean.csv with shape:", df_clean.shape)



‚úÖ Saved labels_clean.csv with shape: (112120, 15)


In [4]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Train augmentations
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=10, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CLAHE(p=0.3),
    A.CoarseDropout(max_holes=1, hole_height=32, hole_width=32, p=0.3),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

# Validation transforms
val_transform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])


  A.CoarseDropout(max_holes=1, hole_height=32, hole_width=32, p=0.3),


In [5]:
class ChestXrayDataset(Dataset):
    def __init__(self, file_list, img_dir, labels_csv, transform=None):
        self.img_paths = []
        self.labels = []

        df = pd.read_csv(labels_csv)
        label_dict = dict(zip(df['Image Index'], df.drop(columns=['Image Index']).values.tolist()))

        with open(file_list, "r") as f:
            for line in f:
                fname = line.strip().split()[0]
                if fname not in label_dict:
                    raise ValueError(f"No labels for {fname}")

                found_path = None
                for subdir in os.listdir(img_dir):
                    subpath = os.path.join(img_dir, subdir, "images", fname)
                    if os.path.exists(subpath):
                        found_path = subpath
                        break

                if found_path is None:
                    raise FileNotFoundError(f"Image {fname} not found in {img_dir}")

                self.img_paths.append(found_path)
                self.labels.append(label_dict[fname])

        self.labels = np.array(self.labels, dtype=np.float32)
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = cv2.imread(self.img_paths[idx], cv2.IMREAD_GRAYSCALE)
        if img is None:
            raise FileNotFoundError(f"Failed to load {self.img_paths[idx]}")

        img = cv2.resize(img, (CONFIG["img_size"], CONFIG["img_size"]))
        img = np.stack([img, img, img], axis=-1)

        if self.transform:
            img = self.transform(image=img)["image"]
        else:
            img = torch.tensor(img/255.0, dtype=torch.float32).permute(2,0,1)

        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return img, label


In [6]:
CONFIG = {
    "img_size": 380,        # EfficientNet-B4 input size
    "batch_size": 8,        # reduce if OOM
    "num_workers": 6,
    "epochs": 50,
    "lr": 1e-4,
    "checkpoint_dir": "checkpoints_efficientnet_b4",  # separate dir for B4
}
os.makedirs(CONFIG["checkpoint_dir"], exist_ok=True)

# File names for best models (avoid conflicts with DenseNet/B3)
BEST_MODEL_AUROC = os.path.join(CONFIG["checkpoint_dir"], "best_model_auroc_b4.pth")
BEST_MODEL_F1    = os.path.join(CONFIG["checkpoint_dir"], "best_model_f1_b4.pth")


In [7]:
labels_csv = "labels_clean.csv"

train_dataset = ChestXrayDataset("train_split.txt", "data", labels_csv, transform=train_transform)
val_dataset   = ChestXrayDataset("val_split.txt", "data", labels_csv, transform=val_transform)

# Weighted Sampler
labels = train_dataset.labels
class_counts = labels.sum(axis=0)
class_weights = 1.0 / (class_counts + 1e-6)
sample_weights = (labels * class_weights).sum(axis=1)
sample_weights = np.clip(sample_weights, 1e-6, None)

sampler = WeightedRandomSampler(
    weights=torch.DoubleTensor(sample_weights),
    num_samples=len(sample_weights),
    replacement=True
)

train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"], sampler=sampler,
                          num_workers=CONFIG["num_workers"], pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG["batch_size"], shuffle=False,
                        num_workers=CONFIG["num_workers"], pin_memory=True, persistent_workers=True)

print(f"Train size: {len(train_dataset)}, Val size: {len(val_dataset)}")

# pos_weight for BCE loss
pos = labels.sum(axis=0)
neg = len(labels) - pos
pos_weight = torch.tensor(neg / (pos + 1e-6), dtype=torch.float32).to(device)


Train size: 77871, Val size: 8653


In [8]:
model = timm.create_model("efficientnet_b4", pretrained=True, num_classes=14)
model = model.to(device).to(memory_format=torch.channels_last)

# Loss
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# Optimizer
optimizer = optim.AdamW(model.parameters(), lr=CONFIG["lr"], weight_decay=1e-4)

# Scheduler with Warmup
from transformers import get_cosine_schedule_with_warmup
total_steps = len(train_loader) * CONFIG["epochs"]
warmup_steps = int(0.1 * total_steps)

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)

# AMP scaler
scaler = amp.GradScaler("cuda")


In [11]:
def compute_metrics(y_true, y_probs, threshold=0.5, class_names=None):
    C = y_true.shape[1]
    aucs, aps, f1s = [], [], []
    per_class_metrics = {}

    for c in range(C):
        try:
            auc = roc_auc_score(y_true[:, c], y_probs[:, c])
        except:
            auc = np.nan
        try:
            ap = average_precision_score(y_true[:, c], y_probs[:, c])
        except:
            ap = np.nan

        y_pred_bin = (y_probs[:, c] >= threshold).astype(int)
        f1 = f1_score(y_true[:, c], y_pred_bin, zero_division=0)

        aucs.append(auc)
        aps.append(ap)
        f1s.append(f1)

        if class_names:
            per_class_metrics[class_names[c]] = {
                "auroc": auc,
                "auprc": ap,
                "f1": f1
            }
        else:
            per_class_metrics[c] = {"auroc": auc, "auprc": ap, "f1": f1}

    macro_f1 = np.nanmean(f1s)
    micro_f1 = f1_score(y_true, (y_probs >= threshold).astype(int), average='micro', zero_division=0)

    return {
        "auroc_macro": np.nanmean(aucs),
        "ap_macro": np.nanmean(aps),
        "macro_f1": macro_f1,
        "micro_f1": micro_f1,
        "per_class": per_class_metrics
    }


# ========================= Training Loop =========================
best_val_auroc = -1
best_val_f1 = -1
patience = 10
epochs_no_improve = 0

class_names = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass',
    'Nodule','Pneumonia','Pneumothorax','Consolidation','Edema',
    'Emphysema','Fibrosis','Pleural_Thickening','Hernia'
]

for epoch in range(CONFIG["epochs"]):
    print(f"\nEpoch {epoch+1}/{CONFIG['epochs']}")
    print(f"Current LR: {scheduler.get_last_lr()[0]:.6f}")

    # ---- Training ----
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc="Training", leave=False):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        with amp.autocast("cuda"):
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        scheduler.step()  # ‚úÖ step per batch

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)

    # ---- Validation ----
    model.eval()
    val_loss, all_probs, all_targets = 0.0, [], []
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation", leave=False):
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            with amp.autocast("cuda"):
                outputs = model(images)
                loss = criterion(outputs, labels)

            val_loss += loss.item()
            all_probs.append(torch.sigmoid(outputs).cpu().numpy())
            all_targets.append(labels.cpu().numpy())

    avg_val_loss = val_loss / len(val_loader)
    all_probs = np.vstack(all_probs)
    all_targets = np.vstack(all_targets)

    metrics = compute_metrics(all_targets, all_probs, class_names=class_names)

    print(f"Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | "
          f"AUROC: {metrics['auroc_macro']:.3f} | AUPRC: {metrics['ap_macro']:.3f} | "
          f"Macro-F1: {metrics['macro_f1']:.3f} | Micro-F1: {metrics['micro_f1']:.3f}")

    print("Sample per-class AUROC:")
    for disease in ["Hernia", "Fibrosis", "Edema"]:
        val = metrics["per_class"][disease]["auroc"]
        print(f"  {disease}: {val:.3f}")

    # ---- Save checkpoint for every epoch ----
    torch.save(model.state_dict(), os.path.join(CONFIG["checkpoint_dir"], f"epoch_{epoch+1}.pth"))

    improved = False

    # ‚úÖ Best by AUROC
    if metrics["auroc_macro"] > best_val_auroc:
        best_val_auroc = metrics["auroc_macro"]
        torch.save(model.state_dict(), BEST_MODEL_AUROC)
        print(f"üåü Best model updated (AUROC {best_val_auroc:.3f})")
        improved = True

    # ‚úÖ Best by Macro-F1
    if metrics["macro_f1"] > best_val_f1:
        best_val_f1 = metrics["macro_f1"]
        torch.save(model.state_dict(), BEST_MODEL_F1)
        print(f"üåü Best model updated (Macro-F1 {best_val_f1:.3f})")
        improved = True

    # ---- Early stopping ----
    if improved:
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        print(f"‚ö†Ô∏è No improvement for {epochs_no_improve} epoch(s).")

    if epochs_no_improve >= patience:
        print(f"‚èπ Early stopping triggered after {epoch+1} epochs "
              f"(no AUROC/F1 improvement for {patience} epochs).")
        break



Epoch 1/50
Current LR: 0.000000


                                                               

Train Loss: 2.7577 | Val Loss: 1.6851 | AUROC: 0.689 | AUPRC: 0.097 | Macro-F1: 0.090 | Micro-F1: 0.092
Sample per-class AUROC:
  Hernia: 0.705
  Fibrosis: 0.661
  Edema: 0.844
üåü Best model updated (AUROC 0.689)
üåü Best model updated (Macro-F1 0.090)

Epoch 2/50
Current LR: 0.000020


                                                               

Train Loss: 1.8845 | Val Loss: 1.4816 | AUROC: 0.747 | AUPRC: 0.148 | Macro-F1: 0.104 | Micro-F1: 0.112
Sample per-class AUROC:
  Hernia: 0.819
  Fibrosis: 0.725
  Edema: 0.871
üåü Best model updated (AUROC 0.747)
üåü Best model updated (Macro-F1 0.104)

Epoch 3/50
Current LR: 0.000040


                                                               

Train Loss: 1.5697 | Val Loss: 1.3912 | AUROC: 0.762 | AUPRC: 0.169 | Macro-F1: 0.119 | Micro-F1: 0.131
Sample per-class AUROC:
  Hernia: 0.824
  Fibrosis: 0.718
  Edema: 0.875
üåü Best model updated (AUROC 0.762)
üåü Best model updated (Macro-F1 0.119)

Epoch 4/50
Current LR: 0.000060


                                                               

Train Loss: 1.2839 | Val Loss: 1.5666 | AUROC: 0.763 | AUPRC: 0.181 | Macro-F1: 0.132 | Micro-F1: 0.148
Sample per-class AUROC:
  Hernia: 0.829
  Fibrosis: 0.738
  Edema: 0.873
üåü Best model updated (AUROC 0.763)
üåü Best model updated (Macro-F1 0.132)

Epoch 5/50
Current LR: 0.000080


                                                               

Train Loss: 1.0405 | Val Loss: 1.7160 | AUROC: 0.762 | AUPRC: 0.178 | Macro-F1: 0.153 | Micro-F1: 0.171
Sample per-class AUROC:
  Hernia: 0.863
  Fibrosis: 0.725
  Edema: 0.870
üåü Best model updated (Macro-F1 0.153)

Epoch 6/50
Current LR: 0.000100


                                                               

Train Loss: 0.8493 | Val Loss: 1.9804 | AUROC: 0.750 | AUPRC: 0.175 | Macro-F1: 0.165 | Micro-F1: 0.192
Sample per-class AUROC:
  Hernia: 0.767
  Fibrosis: 0.705
  Edema: 0.851
üåü Best model updated (Macro-F1 0.165)

Epoch 7/50
Current LR: 0.000100


                                                               

Train Loss: 0.6985 | Val Loss: 2.2255 | AUROC: 0.753 | AUPRC: 0.167 | Macro-F1: 0.161 | Micro-F1: 0.194
Sample per-class AUROC:
  Hernia: 0.848
  Fibrosis: 0.734
  Edema: 0.829
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 8/50
Current LR: 0.000100


                                                               

Train Loss: 0.5793 | Val Loss: 2.2566 | AUROC: 0.750 | AUPRC: 0.170 | Macro-F1: 0.174 | Micro-F1: 0.206
Sample per-class AUROC:
  Hernia: 0.782
  Fibrosis: 0.725
  Edema: 0.838
üåü Best model updated (Macro-F1 0.174)

Epoch 9/50
Current LR: 0.000099


                                                               

Train Loss: 0.4959 | Val Loss: 2.5751 | AUROC: 0.754 | AUPRC: 0.170 | Macro-F1: 0.189 | Micro-F1: 0.214
Sample per-class AUROC:
  Hernia: 0.821
  Fibrosis: 0.707
  Edema: 0.819
üåü Best model updated (Macro-F1 0.189)

Epoch 10/50
Current LR: 0.000098


                                                               

Train Loss: 0.4332 | Val Loss: 2.5772 | AUROC: 0.750 | AUPRC: 0.169 | Macro-F1: 0.187 | Micro-F1: 0.222
Sample per-class AUROC:
  Hernia: 0.792
  Fibrosis: 0.724
  Edema: 0.834
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 11/50
Current LR: 0.000097


                                                               

Train Loss: 0.3766 | Val Loss: 2.7994 | AUROC: 0.740 | AUPRC: 0.160 | Macro-F1: 0.181 | Micro-F1: 0.220
Sample per-class AUROC:
  Hernia: 0.796
  Fibrosis: 0.725
  Edema: 0.826
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 12/50
Current LR: 0.000096


                                                               

Train Loss: 0.3240 | Val Loss: 3.5994 | AUROC: 0.743 | AUPRC: 0.167 | Macro-F1: 0.208 | Micro-F1: 0.237
Sample per-class AUROC:
  Hernia: 0.812
  Fibrosis: 0.721
  Edema: 0.848
üåü Best model updated (Macro-F1 0.208)

Epoch 13/50
Current LR: 0.000094


                                                               

Train Loss: 0.2925 | Val Loss: 3.4676 | AUROC: 0.747 | AUPRC: 0.170 | Macro-F1: 0.200 | Micro-F1: 0.240
Sample per-class AUROC:
  Hernia: 0.787
  Fibrosis: 0.720
  Edema: 0.853
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 14/50
Current LR: 0.000092


                                                               

Train Loss: 0.2626 | Val Loss: 3.3692 | AUROC: 0.746 | AUPRC: 0.173 | Macro-F1: 0.206 | Micro-F1: 0.239
Sample per-class AUROC:
  Hernia: 0.823
  Fibrosis: 0.704
  Edema: 0.818
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 15/50
Current LR: 0.000090


                                                               

Train Loss: 0.2374 | Val Loss: 3.9972 | AUROC: 0.738 | AUPRC: 0.182 | Macro-F1: 0.217 | Micro-F1: 0.247
Sample per-class AUROC:
  Hernia: 0.697
  Fibrosis: 0.720
  Edema: 0.819
üåü Best model updated (Macro-F1 0.217)

Epoch 16/50
Current LR: 0.000088


                                                               

Train Loss: 0.2102 | Val Loss: 4.1098 | AUROC: 0.735 | AUPRC: 0.175 | Macro-F1: 0.216 | Micro-F1: 0.255
Sample per-class AUROC:
  Hernia: 0.742
  Fibrosis: 0.714
  Edema: 0.807
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 17/50
Current LR: 0.000086


                                                               

Train Loss: 0.1956 | Val Loss: 4.3500 | AUROC: 0.746 | AUPRC: 0.174 | Macro-F1: 0.219 | Micro-F1: 0.263
Sample per-class AUROC:
  Hernia: 0.797
  Fibrosis: 0.707
  Edema: 0.843
üåü Best model updated (Macro-F1 0.219)

Epoch 18/50
Current LR: 0.000083


                                                               

Train Loss: 0.1792 | Val Loss: 4.1821 | AUROC: 0.747 | AUPRC: 0.182 | Macro-F1: 0.221 | Micro-F1: 0.256
Sample per-class AUROC:
  Hernia: 0.828
  Fibrosis: 0.721
  Edema: 0.832
üåü Best model updated (Macro-F1 0.221)

Epoch 19/50
Current LR: 0.000081


                                                               

Train Loss: 0.1576 | Val Loss: 4.6736 | AUROC: 0.737 | AUPRC: 0.178 | Macro-F1: 0.229 | Micro-F1: 0.259
Sample per-class AUROC:
  Hernia: 0.769
  Fibrosis: 0.694
  Edema: 0.843
üåü Best model updated (Macro-F1 0.229)

Epoch 20/50
Current LR: 0.000078


                                                               

Train Loss: 0.1459 | Val Loss: 4.6880 | AUROC: 0.743 | AUPRC: 0.171 | Macro-F1: 0.218 | Micro-F1: 0.255
Sample per-class AUROC:
  Hernia: 0.757
  Fibrosis: 0.705
  Edema: 0.798
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 21/50
Current LR: 0.000075


                                                               

Train Loss: 0.1345 | Val Loss: 4.8355 | AUROC: 0.736 | AUPRC: 0.169 | Macro-F1: 0.213 | Micro-F1: 0.263
Sample per-class AUROC:
  Hernia: 0.742
  Fibrosis: 0.703
  Edema: 0.807
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 22/50
Current LR: 0.000072


                                                               

Train Loss: 0.1215 | Val Loss: 5.5652 | AUROC: 0.730 | AUPRC: 0.168 | Macro-F1: 0.231 | Micro-F1: 0.272
Sample per-class AUROC:
  Hernia: 0.630
  Fibrosis: 0.707
  Edema: 0.812
üåü Best model updated (Macro-F1 0.231)

Epoch 23/50
Current LR: 0.000069


                                                               

Train Loss: 0.1135 | Val Loss: 5.4489 | AUROC: 0.742 | AUPRC: 0.177 | Macro-F1: 0.235 | Micro-F1: 0.268
Sample per-class AUROC:
  Hernia: 0.739
  Fibrosis: 0.708
  Edema: 0.815
üåü Best model updated (Macro-F1 0.235)

Epoch 24/50
Current LR: 0.000065


                                                               

Train Loss: 0.1002 | Val Loss: 5.4314 | AUROC: 0.734 | AUPRC: 0.175 | Macro-F1: 0.231 | Micro-F1: 0.269
Sample per-class AUROC:
  Hernia: 0.697
  Fibrosis: 0.704
  Edema: 0.797
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 25/50
Current LR: 0.000062


                                                               

Train Loss: 0.0913 | Val Loss: 5.8799 | AUROC: 0.735 | AUPRC: 0.171 | Macro-F1: 0.226 | Micro-F1: 0.268
Sample per-class AUROC:
  Hernia: 0.721
  Fibrosis: 0.698
  Edema: 0.805
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 26/50
Current LR: 0.000059


                                                               

Train Loss: 0.0874 | Val Loss: 5.5215 | AUROC: 0.740 | AUPRC: 0.171 | Macro-F1: 0.226 | Micro-F1: 0.274
Sample per-class AUROC:
  Hernia: 0.754
  Fibrosis: 0.690
  Edema: 0.813
‚ö†Ô∏è No improvement for 3 epoch(s).

Epoch 27/50
Current LR: 0.000055


                                                               

Train Loss: 0.0794 | Val Loss: 5.7101 | AUROC: 0.735 | AUPRC: 0.179 | Macro-F1: 0.239 | Micro-F1: 0.278
Sample per-class AUROC:
  Hernia: 0.679
  Fibrosis: 0.725
  Edema: 0.825
üåü Best model updated (Macro-F1 0.239)

Epoch 28/50
Current LR: 0.000052


                                                               

Train Loss: 0.0724 | Val Loss: 6.0185 | AUROC: 0.730 | AUPRC: 0.172 | Macro-F1: 0.240 | Micro-F1: 0.280
Sample per-class AUROC:
  Hernia: 0.668
  Fibrosis: 0.709
  Edema: 0.796
üåü Best model updated (Macro-F1 0.240)

Epoch 29/50
Current LR: 0.000048


                                                               

Train Loss: 0.0642 | Val Loss: 6.5931 | AUROC: 0.736 | AUPRC: 0.172 | Macro-F1: 0.238 | Micro-F1: 0.282
Sample per-class AUROC:
  Hernia: 0.663
  Fibrosis: 0.718
  Edema: 0.809
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 30/50
Current LR: 0.000045


                                                               

Train Loss: 0.0612 | Val Loss: 6.4762 | AUROC: 0.739 | AUPRC: 0.174 | Macro-F1: 0.232 | Micro-F1: 0.284
Sample per-class AUROC:
  Hernia: 0.726
  Fibrosis: 0.715
  Edema: 0.791
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 31/50
Current LR: 0.000041


                                                               

Train Loss: 0.0574 | Val Loss: 6.6328 | AUROC: 0.734 | AUPRC: 0.179 | Macro-F1: 0.240 | Micro-F1: 0.286
Sample per-class AUROC:
  Hernia: 0.659
  Fibrosis: 0.695
  Edema: 0.799
‚ö†Ô∏è No improvement for 3 epoch(s).

Epoch 32/50
Current LR: 0.000038


                                                               

Train Loss: 0.0508 | Val Loss: 6.7393 | AUROC: 0.726 | AUPRC: 0.177 | Macro-F1: 0.244 | Micro-F1: 0.285
Sample per-class AUROC:
  Hernia: 0.581
  Fibrosis: 0.703
  Edema: 0.778
üåü Best model updated (Macro-F1 0.244)

Epoch 33/50
Current LR: 0.000035


                                                               

Train Loss: 0.0481 | Val Loss: 7.1267 | AUROC: 0.729 | AUPRC: 0.178 | Macro-F1: 0.239 | Micro-F1: 0.288
Sample per-class AUROC:
  Hernia: 0.664
  Fibrosis: 0.695
  Edema: 0.781
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 34/50
Current LR: 0.000031


                                                               

Train Loss: 0.0432 | Val Loss: 7.4207 | AUROC: 0.726 | AUPRC: 0.174 | Macro-F1: 0.239 | Micro-F1: 0.292
Sample per-class AUROC:
  Hernia: 0.682
  Fibrosis: 0.682
  Edema: 0.756
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 35/50
Current LR: 0.000028


                                                               

Train Loss: 0.0409 | Val Loss: 7.2922 | AUROC: 0.732 | AUPRC: 0.176 | Macro-F1: 0.241 | Micro-F1: 0.292
Sample per-class AUROC:
  Hernia: 0.682
  Fibrosis: 0.716
  Edema: 0.773
‚ö†Ô∏è No improvement for 3 epoch(s).

Epoch 36/50
Current LR: 0.000025


                                                               

Train Loss: 0.0369 | Val Loss: 7.6927 | AUROC: 0.722 | AUPRC: 0.174 | Macro-F1: 0.253 | Micro-F1: 0.294
Sample per-class AUROC:
  Hernia: 0.580
  Fibrosis: 0.690
  Edema: 0.774
üåü Best model updated (Macro-F1 0.253)

Epoch 37/50
Current LR: 0.000022


                                                               

Train Loss: 0.0345 | Val Loss: 8.1768 | AUROC: 0.723 | AUPRC: 0.177 | Macro-F1: 0.256 | Micro-F1: 0.296
Sample per-class AUROC:
  Hernia: 0.599
  Fibrosis: 0.693
  Edema: 0.778
üåü Best model updated (Macro-F1 0.256)

Epoch 38/50
Current LR: 0.000019


                                                               

Train Loss: 0.0329 | Val Loss: 7.6749 | AUROC: 0.722 | AUPRC: 0.179 | Macro-F1: 0.255 | Micro-F1: 0.296
Sample per-class AUROC:
  Hernia: 0.591
  Fibrosis: 0.697
  Edema: 0.769
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 39/50
Current LR: 0.000017


                                                               

Train Loss: 0.0303 | Val Loss: 8.0120 | AUROC: 0.720 | AUPRC: 0.176 | Macro-F1: 0.251 | Micro-F1: 0.298
Sample per-class AUROC:
  Hernia: 0.590
  Fibrosis: 0.691
  Edema: 0.772
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 40/50
Current LR: 0.000014


                                                               

Train Loss: 0.0279 | Val Loss: 8.0227 | AUROC: 0.725 | AUPRC: 0.178 | Macro-F1: 0.250 | Micro-F1: 0.297
Sample per-class AUROC:
  Hernia: 0.621
  Fibrosis: 0.699
  Edema: 0.773
‚ö†Ô∏è No improvement for 3 epoch(s).

Epoch 41/50
Current LR: 0.000012


                                                               

Train Loss: 0.0282 | Val Loss: 8.7014 | AUROC: 0.720 | AUPRC: 0.179 | Macro-F1: 0.251 | Micro-F1: 0.300
Sample per-class AUROC:
  Hernia: 0.599
  Fibrosis: 0.676
  Edema: 0.769
‚ö†Ô∏è No improvement for 4 epoch(s).

Epoch 42/50
Current LR: 0.000010


                                                               

Train Loss: 0.0287 | Val Loss: 8.5280 | AUROC: 0.726 | AUPRC: 0.180 | Macro-F1: 0.261 | Micro-F1: 0.305
Sample per-class AUROC:
  Hernia: 0.601
  Fibrosis: 0.697
  Edema: 0.771
üåü Best model updated (Macro-F1 0.261)

Epoch 43/50
Current LR: 0.000008


                                                               

Train Loss: 0.0243 | Val Loss: 8.2982 | AUROC: 0.724 | AUPRC: 0.180 | Macro-F1: 0.255 | Micro-F1: 0.298
Sample per-class AUROC:
  Hernia: 0.590
  Fibrosis: 0.694
  Edema: 0.780
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 44/50
Current LR: 0.000006


                                                               

Train Loss: 0.0230 | Val Loss: 8.1179 | AUROC: 0.729 | AUPRC: 0.180 | Macro-F1: 0.251 | Micro-F1: 0.300
Sample per-class AUROC:
  Hernia: 0.628
  Fibrosis: 0.699
  Edema: 0.780
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 45/50
Current LR: 0.000004


                                                               

Train Loss: 0.0227 | Val Loss: 8.3449 | AUROC: 0.727 | AUPRC: 0.182 | Macro-F1: 0.261 | Micro-F1: 0.306
Sample per-class AUROC:
  Hernia: 0.600
  Fibrosis: 0.699
  Edema: 0.782
üåü Best model updated (Macro-F1 0.261)

Epoch 46/50
Current LR: 0.000003


                                                               

Train Loss: 0.0204 | Val Loss: 8.3849 | AUROC: 0.726 | AUPRC: 0.180 | Macro-F1: 0.256 | Micro-F1: 0.303
Sample per-class AUROC:
  Hernia: 0.597
  Fibrosis: 0.698
  Edema: 0.771
‚ö†Ô∏è No improvement for 1 epoch(s).

Epoch 47/50
Current LR: 0.000002


                                                               

Train Loss: 0.0213 | Val Loss: 8.6329 | AUROC: 0.724 | AUPRC: 0.179 | Macro-F1: 0.256 | Micro-F1: 0.302
Sample per-class AUROC:
  Hernia: 0.602
  Fibrosis: 0.696
  Edema: 0.770
‚ö†Ô∏è No improvement for 2 epoch(s).

Epoch 48/50
Current LR: 0.000001


                                                             

KeyboardInterrupt: 

In [10]:
class_names = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass',
    'Nodule','Pneumonia','Pneumothorax','Consolidation','Edema',
    'Emphysema','Fibrosis','Pleural_Thickening','Hernia'
]

BEST_MODEL_AUROC = "checkpoints_efficientnet_b4/best_model_auroc_b4.pth"
BEST_MODEL_F1    = "checkpoints_efficientnet_b4/best_model_f1_b4.pth"

def evaluate_model(model_path, model_name):
    """
    Load a model, run inference, tune thresholds, and compute metrics.
    Returns dict with overall + per-class metrics.
    """
    # Load model
    model = timm.create_model("efficientnet_b4", pretrained=False, num_classes=14)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()

    all_probs, all_targets = [], []
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Eval {model_name}", leave=False):
            images = images.to(device, non_blocking=True)
            with amp.autocast("cuda"):
                outputs = model(images)
            all_probs.append(torch.sigmoid(outputs).cpu().numpy())
            all_targets.append(labels.numpy())

    all_probs = np.vstack(all_probs)
    all_targets = np.vstack(all_targets)

    # ---- Per-class threshold tuning ----
    best_thresh = []
    per_class_results = {}
    for c, disease in enumerate(class_names):
        best_f, best_t = -1, 0.5
        for t in np.linspace(0.05, 0.95, 19):
            f = f1_score(all_targets[:,c], (all_probs[:,c] >= t).astype(int), zero_division=0)
            if f > best_f:
                best_f, best_t = f, t
        best_thresh.append(best_t)

        # compute per-class metrics with tuned threshold
        y_true_c = all_targets[:,c]
        y_prob_c = all_probs[:,c]
        y_pred_c = (y_prob_c >= best_t).astype(int)

        try:
            auc = roc_auc_score(y_true_c, y_prob_c)
        except:
            auc = np.nan
        try:
            ap = average_precision_score(y_true_c, y_prob_c)
        except:
            ap = np.nan
        f1c = f1_score(y_true_c, y_pred_c, zero_division=0)

        per_class_results[disease] = {
            "best_thresh": best_t,
            "auroc": auc,
            "auprc": ap,
            "f1": f1c
        }

    # ---- Overall metrics ----
    final_preds = (all_probs >= np.array(best_thresh)).astype(int)
    macro_f1 = f1_score(all_targets, final_preds, average='macro', zero_division=0)
    micro_f1 = f1_score(all_targets, final_preds, average='micro', zero_division=0)
    auroc_macro = roc_auc_score(all_targets, all_probs, average='macro')

    results = {
        "model": model_name,
        "macro_auroc": auroc_macro,
        "macro_f1": macro_f1,
        "micro_f1": micro_f1,
        "per_class": per_class_results,
    }
    return results


# ========================= Evaluate both best models =========================
results_auroc = evaluate_model(BEST_MODEL_AUROC, "EfficientNet-B4 Best AUROC")
results_f1    = evaluate_model(BEST_MODEL_F1, "EfficientNet-B4 Best F1")

# ---- Print Summary ----
def print_summary(results):
    print(f"\nüìä {results['model']}")
    print(f"Macro AUROC: {results['macro_auroc']:.3f}")
    print(f"Macro-F1 (tuned): {results['macro_f1']:.3f}")
    print(f"Micro-F1 (tuned): {results['micro_f1']:.3f}")
    print("\nPer-class metrics:")
    for disease, metrics in results["per_class"].items():
        print(f"  {disease:<20} "
              f"Thresh={metrics['best_thresh']:.2f} | "
              f"AUROC={metrics['auroc']:.3f} | "
              f"AUPRC={metrics['auprc']:.3f} | "
              f"F1={metrics['f1']:.3f}")

print_summary(results_auroc)
print_summary(results_f1)

# ---- Comparison table ----
import pandas as pd

df_auroc = pd.DataFrame(results_auroc["per_class"]).T
df_f1    = pd.DataFrame(results_f1["per_class"]).T

comparison_df = df_auroc[["auroc","auprc","f1"]].rename(
    columns={"auroc":"AUROC (best AUROC model)",
             "auprc":"AUPRC (best AUROC model)",
             "f1":"F1 (best AUROC model)"})

comparison_df["AUROC (best F1 model)"] = df_f1["auroc"]
comparison_df["AUPRC (best F1 model)"] = df_f1["auprc"]
comparison_df["F1 (best F1 model)"] = df_f1["f1"]

print("\n================ Performance Comparison per class ================\n")
display(comparison_df.round(3))


  model.load_state_dict(torch.load(model_path, map_location=device))
  model.load_state_dict(torch.load(model_path, map_location=device))
                                                                                 


üìä EfficientNet-B4 Best AUROC
Macro AUROC: 0.763
Macro-F1 (tuned): 0.230
Micro-F1 (tuned): 0.267

Per-class metrics:
  Atelectasis          Thresh=0.85 | AUROC=0.747 | AUPRC=0.256 | F1=0.331
  Cardiomegaly         Thresh=0.95 | AUROC=0.861 | AUPRC=0.183 | F1=0.217
  Effusion             Thresh=0.85 | AUROC=0.860 | AUPRC=0.462 | F1=0.486
  Infiltration         Thresh=0.60 | AUROC=0.616 | AUPRC=0.243 | F1=0.300
  Mass                 Thresh=0.95 | AUROC=0.760 | AUPRC=0.192 | F1=0.259
  Nodule               Thresh=0.90 | AUROC=0.646 | AUPRC=0.136 | F1=0.197
  Pneumonia            Thresh=0.95 | AUROC=0.603 | AUPRC=0.019 | F1=0.044
  Pneumothorax         Thresh=0.95 | AUROC=0.858 | AUPRC=0.233 | F1=0.302
  Consolidation        Thresh=0.95 | AUROC=0.677 | AUPRC=0.090 | F1=0.138
  Edema                Thresh=0.95 | AUROC=0.873 | AUPRC=0.118 | F1=0.183
  Emphysema            Thresh=0.95 | AUROC=0.899 | AUPRC=0.276 | F1=0.256
  Fibrosis             Thresh=0.95 | AUROC=0.738 | AUPRC=0.081 | F

Unnamed: 0,AUROC (best AUROC model),AUPRC (best AUROC model),F1 (best AUROC model),AUROC (best F1 model),AUPRC (best F1 model),F1 (best F1 model)
Atelectasis,0.747,0.256,0.331,0.738,0.262,0.341
Cardiomegaly,0.861,0.183,0.217,0.838,0.18,0.303
Effusion,0.86,0.462,0.486,0.842,0.401,0.472
Infiltration,0.616,0.243,0.3,0.585,0.21,0.288
Mass,0.76,0.192,0.259,0.758,0.239,0.34
Nodule,0.646,0.136,0.197,0.672,0.155,0.235
Pneumonia,0.603,0.019,0.044,0.598,0.021,0.058
Pneumothorax,0.858,0.233,0.302,0.867,0.309,0.398
Consolidation,0.677,0.09,0.138,0.651,0.093,0.17
Edema,0.873,0.118,0.183,0.782,0.107,0.199
