In [1]:
# Run this single shell cell
!pip install -q ultralytics==8.0.114 grad-cam pydicom timm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m595.4/595.4 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m98.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m76.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m38.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━

In [None]:


import os, random
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
import pydicom

# =====================
# CONFIG
# =====================
OUT_IMG_SIZE = (512, 512)   # None -> keep original resolution
MAX_IMAGES_PER_SPLIT = None  # set to an int for debugging
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# Paths (update these according to Kaggle dataset mount)
TRAIN_CSV = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train.csv")
SAMPLE_SUB = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv")
TRAIN_DICOM_DIR = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train")
TEST_DICOM_DIR = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/test")


# Create folders
for split in ["train", "val", "test"]:
    (IMG_DIR / split).mkdir(parents=True, exist_ok=True)
    (LAB_DIR / split).mkdir(parents=True, exist_ok=True)

# =====================
# Load train.csv
# =====================
df = pd.read_csv(TRAIN_CSV)
print("Total annotation rows:", len(df))
unique_train_image_ids = df['image_id'].unique().tolist()
print("Unique images with annotations:", len(unique_train_image_ids))

# Collect all dicom files in train dir
all_train_files = [f.stem for f in TRAIN_DICOM_DIR.iterdir() if f.suffix.lower() == '.dicom']
image_ids = sorted(list(set(all_train_files)))
print("Available dicom images in train folder:", len(image_ids))

# Train/val split (90/10)
random.shuffle(image_ids)
val_frac = 0.1
n_val = int(len(image_ids) * val_frac)
val_ids = image_ids[:n_val]
train_ids = image_ids[n_val:]
print("Train ids:", len(train_ids), "Val ids:", len(val_ids))

# =====================
# Conversion helpers
# =====================
def dicom_to_pil(dicom_path):
    ds = pydicom.dcmread(str(dicom_path))
    img = ds.pixel_array.astype(np.float32)
    lo, hi = np.percentile(img, (0.5, 99.5))
    img = np.clip(img, lo, hi)
    img = img - img.min()
    if img.max() > 0:
        img = img / img.max()
    img = (img * 255).astype(np.uint8)
    pil = Image.fromarray(img).convert("RGB")
    return pil

def write_yolo_label(image_id, boxes_for_image, out_label_path, img_w, img_h):
    lines = []
    for box in boxes_for_image:
        x_min, y_min, x_max, y_max = box['x_min'], box['y_min'], box['x_max'], box['y_max']
        class_id = int(box['class_id'])
        x_min = max(0, x_min); y_min = max(0, y_min)
        x_max = min(img_w-1, x_max); y_max = min(img_h-1, y_max)
        width = x_max - x_min
        height = y_max - y_min
        if width <= 0 or height <= 0: 
            continue
        cx = x_min + width/2.0
        cy = y_min + height/2.0
        cx_norm = cx / img_w
        cy_norm = cy / img_h
        w_norm = width / img_w
        h_norm = height / img_h
        lines.append(f"{class_id} {cx_norm:.6f} {cy_norm:.6f} {w_norm:.6f} {h_norm:.6f}")
    if len(lines) == 0:
        open(out_label_path, 'w').close()
        return
    with open(out_label_path, 'w') as f:
        f.write("\n".join(lines))

# =====================
# Converters with SKIP logic
# =====================
def convert_split(ids_list, split_name, max_images=None):
    pbar = ids_list if max_images is None else ids_list[:max_images]
    count, skipped = 0, 0
    for img_id in pbar:
        out_img_path = IMG_DIR / split_name / f"{img_id}.jpg"
        out_lbl_path = LAB_DIR / split_name / f"{img_id}.txt"

        # ✅ Skip if already exists
        if out_img_path.exists() and out_lbl_path.exists():
            skipped += 1
            continue

        dicom_path = TRAIN_DICOM_DIR / f"{img_id}.dicom"
        if not dicom_path.exists():
            continue
        try:
            pil = dicom_to_pil(dicom_path)
        except Exception as e:
            print("Failed to read", dicom_path, e)
            continue

        if OUT_IMG_SIZE is not None:
            pil = pil.resize(OUT_IMG_SIZE)

        out_img_path.parent.mkdir(parents=True, exist_ok=True)
        pil.save(out_img_path, quality=95)

        boxes = df[df['image_id'] == img_id]
        write_yolo_label(img_id, boxes.to_dict('records'), out_lbl_path, pil.width, pil.height)

        count += 1
        if count % 500 == 0:
            print(f"{split_name}: converted {count} new images (skipped {skipped})...")
    print(f"Finished {split_name} -> converted {count}, skipped {skipped}.")

def convert_test(ids_list, max_images=None):
    pbar = ids_list if max_images is None else ids_list[:max_images]
    count, skipped = 0, 0
    for img_id in pbar:
        out_img_path = IMG_DIR / "test" / f"{img_id}.jpg"
        out_lbl_path = LAB_DIR / "test" / f"{img_id}.txt"

        # ✅ Skip if already exists
        if out_img_path.exists() and out_lbl_path.exists():
            skipped += 1
            continue

        dicom_path = TEST_DICOM_DIR / f"{img_id}.dicom"
        if not dicom_path.exists():
            continue
        try:
            pil = dicom_to_pil(dicom_path)
        except Exception as e:
            print("Failed to read", dicom_path, e)
            continue

        if OUT_IMG_SIZE is not None:
            pil = pil.resize(OUT_IMG_SIZE)

        out_img_path.parent.mkdir(parents=True, exist_ok=True)
        pil.save(out_img_path, quality=95)

        # empty label
        open(out_lbl_path, 'w').close()

        count += 1
        if count % 500 == 0:
            print(f"test: converted {count} new images (skipped {skipped})...")
    print(f"Finished test -> converted {count}, skipped {skipped}.")

# =====================
# Run conversions
# =====================
print("Converting TRAIN split ...")
convert_split(train_ids, "train", max_images=MAX_IMAGES_PER_SPLIT)

print("Converting VAL split ...")
convert_split(val_ids, "val", max_images=MAX_IMAGES_PER_SPLIT)

print("Converting TEST split ...")
sample_sub = pd.read_csv(SAMPLE_SUB)
test_ids = sample_sub['image_id'].tolist()
convert_test(test_ids, max_images=MAX_IMAGES_PER_SPLIT)



In [4]:
# Python cell
from pathlib import Path
import os, random, time, json, math
import numpy as np, pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torchvision.transforms.functional as TF

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix

# ultralytics
from ultralytics import YOLO

# global paths (update if needed)
WORK_DIR = Path("/kaggle/working/vindr")
IMG_DIR = WORK_DIR / "images"
LAB_DIR = WORK_DIR / "labels"
MODEL_DIR = Path("/kaggle/working/models")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

TRAIN_CSV = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train.csv")
SAMPLE_SUB = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv")
TEST_DICOM_DIR = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/test")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cuda


In [5]:
# Build class list (match earlier)
class_names = [
    "Aortic_enlargement","Atelectasis","Calcification","Cardiomegaly",
    "Consolidation","ILD","Infiltration","Lung_Opacity","Nodule_Mass",
    "Other_lesion","Pleural_effusion","Pleural_thickening",
    "Pneumothorax","Pulmonary_fibrosis","No_finding"
]
NUM_CLASSES = len(class_names)

# Read train.csv and build multi-hot label dict
df = pd.read_csv(TRAIN_CSV)
# group by image_id
targets = {}
for img_id, g in df.groupby("image_id"):
    vec = np.zeros(NUM_CLASSES, dtype=np.float32)
    for cid in g['class_id'].values:
        vec[int(cid)] = 1.0
    targets[img_id] = vec

# Some images may be missing in df (no annotation) -> treat as No_finding
# Ensure that for converted images, we have label vectors
train_img_dir = IMG_DIR / "train"
val_img_dir   = IMG_DIR / "val"

train_ids = [p.stem for p in train_img_dir.glob("*.jpg")]
val_ids   = [p.stem for p in val_img_dir.glob("*.jpg")]

# if an image not in targets -> treat as no finding (class 14 = No_finding)
for img in train_ids + val_ids:
    if img not in targets:
        vec = np.zeros(NUM_CLASSES, dtype=np.float32)
        vec[14] = 1.0
        targets[img] = vec

# Dataset class
class MultiLabelCXRDataset(Dataset):
    def __init__(self, image_dir, img_ids, targets_dict, transform=None):
        self.image_dir = Path(image_dir)
        self.img_ids = img_ids
        self.targets = targets_dict
        self.transform = transform

    def __len__(self): return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img_path = self.image_dir / f"{img_id}.jpg"
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = torch.tensor(self.targets[img_id], dtype=torch.float32)
        return img, label, img_id


In [6]:
from torchvision import transforms

BATCH = 32

train_tfms = transforms.Compose([
    # Randomly crop & resize while keeping aspect ratio
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    
    # Geometric augmentations
    transforms.RandomHorizontalFlip(p=0.5),       # Chest X-rays: left/right flip is valid
    transforms.RandomRotation(degrees=7),         # Small rotations, <= 7°
    transforms.RandomAffine(
        degrees=0, 
        translate=(0.02, 0.02), 
        scale=(0.95, 1.05), 
        shear=2
    ),

    # Photometric augmentations (mild)
    transforms.ColorJitter(brightness=0.15, contrast=0.15),
    transforms.RandomGrayscale(p=0.1),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),

    # Convert & normalize
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

val_tfms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])


train_ds = MultiLabelCXRDataset(train_img_dir, train_ids, targets, transform=train_tfms)
val_ds   = MultiLabelCXRDataset(val_img_dir, val_ids, targets, transform=val_tfms)

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH, shuffle=False, num_workers=4, pin_memory=True)
print("Train / Val sizes:", len(train_ds), len(val_ds))


Train / Val sizes: 13500 1500


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50, ResNet50_Weights
from torch.amp import autocast, GradScaler

# Model
NUM_CLASSES = 15  # change as needed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_cls = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
model_cls.fc = nn.Linear(model_cls.fc.in_features, NUM_CLASSES)
model_cls = model_cls.to(device)

# Loss, optimizer, scheduler
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model_cls.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)


scaler = GradScaler()



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 198MB/s]


In [None]:
from tqdm.auto import tqdm
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score


best_val_loss = float("inf")
best_path = MODEL_DIR / "resnet50_multilabel_best.pth"
# =========================
# Hyperparameters / Config
# =========================
BATCH = 32
NUM_CLASSES = 15
EPOCHS = 20            # <-- define it here, before the loop
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-5

for epoch in range(1, EPOCHS + 1):
    print(f"\n===== Epoch {epoch}/{EPOCHS} =====")
    # ---------------- TRAIN ----------------
    model_cls.train()
    running_loss = 0.0
    n_samples = 0

    train_preds_cpu = []
    train_targets_cpu = []

    pbar = tqdm(train_loader, desc=f"Train Epoch {epoch}", leave=False)
    for batch in pbar:
        # dataset returns (img, label) or (img, label, id)
        if len(batch) == 3:
            imgs, labels, _ = batch
        else:
            imgs, labels = batch

        imgs = imgs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True).float()

        optimizer.zero_grad()

        with autocast(device_type="cuda"):   # correct autocast usage
            outputs = model_cls(imgs)         # logits (B, C)
            loss = criterion(outputs, labels)

        # backward + step with scaler
        scaler.scale(loss).backward()
        # optional: gradient clipping (example)
        # scaler.unscale_(optimizer)
        # torch.nn.utils.clip_grad_norm_(model_cls.parameters(), max_norm=1.0)

        scaler.step(optimizer)
        scaler.update()

        batch_size = imgs.size(0)
        running_loss += loss.item() * batch_size
        n_samples += batch_size

        # collect predictions & labels on CPU (avoid storing GPU tensors)
        with torch.no_grad():
            probs = torch.sigmoid(outputs).detach().cpu().numpy()   # (B, C)
            tlabels = labels.detach().cpu().numpy()
        train_preds_cpu.append(probs)
        train_targets_cpu.append(tlabels)

        pbar.set_postfix({"loss": f"{running_loss / n_samples:.4f}"})

    train_loss = running_loss / max(1, n_samples)
    train_preds = np.vstack(train_preds_cpu)
    train_targets = np.vstack(train_targets_cpu)

    # threshold at 0.5 for binary predictions; can tune per-class thresholds later
    train_pred_bin = (train_preds >= 0.5).astype(int)

    # metrics: macro F1, micro F1, samples F1 (samples average better for multi-label)
    train_f1_macro = f1_score(train_targets, train_pred_bin, average="macro", zero_division=0)
    train_f1_micro = f1_score(train_targets, train_pred_bin, average="micro", zero_division=0)
    train_f1_samples = f1_score(train_targets, train_pred_bin, average="samples", zero_division=0)

    # ---------------- VALIDATION ----------------
    model_cls.eval()
    val_running_loss = 0.0
    val_n = 0
    val_preds_cpu = []
    val_targets_cpu = []

    with torch.no_grad():
        pbar = tqdm(val_loader, desc=f"Val Epoch {epoch}", leave=False)
        for batch in pbar:
            if len(batch) == 3:
                imgs, labels, _ = batch
            else:
                imgs, labels = batch

            imgs = imgs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True).float()

            with autocast(device_type="cuda"):
                outputs = model_cls(imgs)
                loss = criterion(outputs, labels)

            b = imgs.size(0)
            val_running_loss += loss.item() * b
            val_n += b

            probs = torch.sigmoid(outputs).detach().cpu().numpy()
            tlabels = labels.detach().cpu().numpy()
            val_preds_cpu.append(probs)
            val_targets_cpu.append(tlabels)

            pbar.set_postfix({"val_loss": f"{val_running_loss / val_n:.4f}"})

    val_loss = val_running_loss / max(1, val_n)
    val_preds = np.vstack(val_preds_cpu)
    val_targets = np.vstack(val_targets_cpu)
    val_pred_bin = (val_preds >= 0.5).astype(int)

    val_f1_macro = f1_score(val_targets, val_pred_bin, average="macro", zero_division=0)
    val_f1_micro = f1_score(val_targets, val_pred_bin, average="micro", zero_division=0)
    val_f1_samples = f1_score(val_targets, val_pred_bin, average="samples", zero_division=0)

    # optional: per-class F1
    try:
        per_class_f1 = f1_score(val_targets, val_pred_bin, average=None, zero_division=0)
    except Exception:
        per_class_f1 = None

    # optional: per-class AUROC (requires at least one pos & neg per class)
    try:
        per_class_auroc = []
        for c in range(NUM_CLASSES):
            if len(np.unique(val_targets[:, c])) > 1:
                per_class_auroc.append(roc_auc_score(val_targets[:, c], val_preds[:, c]))
            else:
                per_class_auroc.append(np.nan)
        per_class_auroc = np.array(per_class_auroc)
    except Exception:
        per_class_auroc = None

    # scheduler step (ReduceLROnPlateau expects validation metric)
    scheduler.step(val_loss)

    # Logging
    print(
        f"Epoch {epoch}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}\n"
        f"  Train F1 (macro/micro/samples): {train_f1_macro:.4f}/{train_f1_micro:.4f}/{train_f1_samples:.4f}\n"
        f"  Val   F1 (macro/micro/samples): {val_f1_macro:.4f}/{val_f1_micro:.4f}/{val_f1_samples:.4f}"
    )
    if per_class_f1 is not None:
        print("  Per-class F1 (first 5):", np.round(per_class_f1[:5], 3))

    if per_class_auroc is not None:
        print("  Per-class AUROC (first 5):", np.round(per_class_auroc[:5], 3))

    # save best model by val_loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            "epoch": epoch,
            "model_state": model_cls.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "scaler_state": scaler.state_dict(),
            "val_loss": val_loss,
        }, best_path)
        print("✅ Saved best model ->", best_path)

# Save final weights
torch.save(model_cls.state_dict(), MODEL_DIR / "resnet50_multilabel_final.pth")
print("🏁 Done. Best val loss:", best_val_loss)


In [8]:
# === Testing & Visualization for your ResNet50 multilabel model ===
import os
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
from tqdm.auto import tqdm
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, classification_report
try:
    from torchmetrics.classification import MultilabelAUROC
    _HAS_TORCHMETRICS = True
except Exception:
    _HAS_TORCHMETRICS = False

# grad-cam imports (pytorch-grad-cam)
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image

# -----------------------
# Helpers: load checkpoint
# -----------------------
def load_model_from_checkpoint(model, ckpt_path, device):
    ckpt = torch.load(ckpt_path, map_location=device)
    # ckpt could be either state_dict or wrapped dict
    if 'model_state' in ckpt:
        state = ckpt['model_state']
    else:
        state = ckpt
    model.load_state_dict(state)
    model.to(device)
    model.eval()
    return model

# -----------------------
# Inference on a loader
# -----------------------
@torch.no_grad()
def infer_loader(model, loader, device):
    model.eval()
    all_probs = []
    all_targets = []
    for batch in tqdm(loader, desc="Infer"):
        # batch might contain id
        if len(batch) == 3:
            imgs, labels, _ = batch
        else:
            imgs, labels = batch
        imgs = imgs.to(device, non_blocking=True)
        labels = labels.cpu().numpy()
        outputs = model(imgs)                  # logits
        probs = torch.sigmoid(outputs).cpu().numpy()
        all_probs.append(probs)
        all_targets.append(labels)
    all_probs = np.vstack(all_probs)
    all_targets = np.vstack(all_targets)
    return all_probs, all_targets


In [9]:
from torch.utils.data import DataLoader

# Recreate val_loader for inference (no multiprocessing)
val_loader = DataLoader(
    val_ds,
    batch_size=BATCH,
    shuffle=False,
    num_workers=0,    # <-- important for notebook stability
    pin_memory=True
)


In [10]:
import numpy as np
import torch

def multilabel_metrics(y_true, y_probs, threshold=0.5, verbose=True):
    """
    y_true: numpy array shape (N, C) float or int (0/1)
    y_probs: numpy array shape (N, C) of probabilities
    Returns dict of metrics. Uses torchmetrics if available and inputs converted correctly.
    """
    y_true_np = (y_true >= 0.5).astype(int)  # ensure 0/1 ints for sklearn
    y_pred_np = (y_probs >= threshold).astype(int)

    # sklearn metrics
    from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

    f1_macro = f1_score(y_true_np, y_pred_np, average='macro', zero_division=0)
    f1_micro = f1_score(y_true_np, y_pred_np, average='micro', zero_division=0)
    f1_samples = f1_score(y_true_np, y_pred_np, average='samples', zero_division=0)
    precision_macro = precision_score(y_true_np, y_pred_np, average='macro', zero_division=0)
    recall_macro = recall_score(y_true_np, y_pred_np, average='macro', zero_division=0)

    # per-class AUROC with sklearn (guard classes with single label)
    per_class_auroc = []
    for c in range(y_true_np.shape[1]):
        if len(np.unique(y_true_np[:, c])) > 1:
            per_class_auroc.append(roc_auc_score(y_true_np[:, c], y_probs[:, c]))
        else:
            per_class_auroc.append(np.nan)
    per_class_auroc = np.array(per_class_auroc)

    # torchmetrics multilabel AUROC (optional)
    auroc_tm = None
    if _HAS_TORCHMETRICS:
        try:
            import torch as _torch
            # torchmetrics wants preds (float) and target (int/long)
            preds_t = _torch.tensor(y_probs, dtype=_torch.float32, device='cpu')
            target_t = _torch.tensor(y_true_np, dtype=_torch.long, device='cpu')  # IMPORTANT: long/int
            m_auroc = MultilabelAUROC(num_labels=y_true_np.shape[1])
            auroc_tm = m_auroc(preds_t, target_t).numpy()
        except Exception as e:
            # fallback silently to sklearn results
            if verbose:
                print("torchmetrics AUROC failed:", e)
            auroc_tm = None

    if verbose:
        print("F1 (macro/micro/samples):", f1_macro, f1_micro, f1_samples)
        print("Precision (macro):", precision_macro, "Recall (macro):", recall_macro)
        if auroc_tm is not None:
            print("Torchmetrics Multilabel AUROC:", auroc_tm)
        print("Per-class AUROC (first 8):", np.round(per_class_auroc[:8], 4))

    return {
        "f1_macro": f1_macro,
        "f1_micro": f1_micro,
        "f1_samples": f1_samples,
        "precision_macro": precision_macro,
        "recall_macro": recall_macro,
        "per_class_auroc": per_class_auroc,
        "torchmetrics_auroc": auroc_tm,
        "y_pred": y_pred_np
    }


In [11]:
# -----------------------
# Grad-CAM visualization
# -----------------------
# For ResNet50, choose last conv layer: model.layer4[-1].conv3 (commonly used)
def get_resnet_target_layer(model):
    try:
        return model.layer4[-1].conv3
    except Exception:
        # fallback to last module with 4 dims
        for m in reversed(list(model.modules())):
            if isinstance(m, torch.nn.Conv2d):
                return m
    raise RuntimeError("Couldn't find conv target layer")

def gradcam_on_image(model, img_path, target_class, transform_for_model, target_layer=None, device='cuda'):
    """
    img_path: Path to .jpg
    target_class: int class index to visualize (0..NUM_CLASSES-1)
    transform_for_model: torchvision transform used for model input (e.g., val_tfms)
    Returns: cam_image (RGB numpy array), overlay (RGB numpy array)
    """
    # load and preprocess
    img = Image.open(img_path).convert("RGB")
    # we need a normalized tensor for model, and an unnormalized float image for overlay
    input_tensor = transform_for_model(img).unsqueeze(0)  # C,H,W -> 1,C,H,W
    # build image for overlay: normalized to [0,1] with same H,W
    img_for_overlay = np.array(img.resize((input_tensor.shape[-1], input_tensor.shape[-2]))) / 255.0
    input_tensor = input_tensor.to(device)

    # choose target layer
    if target_layer is None:
        target_layer = get_resnet_target_layer(model)

    cam = GradCAM(model=model, target_layers=[target_layer], use_cuda=(device == 'cuda'))
    targets = [ClassifierOutputTarget(target_class)]
    grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0]   # H x W, values 0..1

    # overlay
    visualization = show_cam_on_image(img_for_overlay, grayscale_cam, use_rgb=True)
    return visualization, grayscale_cam


In [12]:
# 1) Ensure val_loader recreated with num_workers=0 (run block A)
# 2) Load checkpoint if not loaded already (reuse load_model_from_checkpoint)
ckpt_to_load = MODEL_DIR / "resnet50_multilabel_best (1).pth"
if not ckpt_to_load.exists():
    ckpt_to_load = MODEL_DIR / "resnet50_multilabel_final.pth"
print("Loading:", ckpt_to_load)
model = load_model_from_checkpoint(model_cls, ckpt_to_load, device)

# 3) Infer
probs, targets = infer_loader(model, val_loader, device)  # returns numpy arrays

# 4) Metrics
metrics = multilabel_metrics(targets, probs, threshold=0.5)


# 3) Per-class report (top/bottom by AUROC)
per_class_auroc = metrics["per_class_auroc"]
for i, cname in enumerate(class_names):
    print(f"{i:02d} {cname:20s} AUROC: {per_class_auroc[i]:.4f}  (pos fraction: {targets[:,i].mean():.4f})")

# 4) Confusion-like summary: per-class F1 (using threshold 0.5)
y_pred = metrics["y_pred"]
per_class_f1 = []
for c in range(NUM_CLASSES):
    try:
        per_class_f1.append(f1_score(targets[:,c], y_pred[:,c], zero_division=0))
    except Exception:
        per_class_f1.append(np.nan)
print("Per-class F1 (first 8):", np.round(per_class_f1[:8], 4))



Loading: /kaggle/working/models/resnet50_multilabel_best (1).pth


Infer:   0%|          | 0/47 [00:00<?, ?it/s]

F1 (macro/micro/samples): 0.4557527218433765 0.7765800042274361 0.8634333402980462
Precision (macro): 0.6049614860134169 Recall (macro): 0.40100188805834924
Torchmetrics Multilabel AUROC: 0.94955
Per-class AUROC (first 8): [     0.9816      0.9504      0.9133      0.9809      0.9784       0.941      0.9649      0.9398]
00 Aortic_enlargement   AUROC: 0.9816  (pos fraction: 0.2060)
01 Atelectasis          AUROC: 0.9504  (pos fraction: 0.0140)
02 Calcification        AUROC: 0.9133  (pos fraction: 0.0267)
03 Cardiomegaly         AUROC: 0.9809  (pos fraction: 0.1447)
04 Consolidation        AUROC: 0.9784  (pos fraction: 0.0213)
05 ILD                  AUROC: 0.9410  (pos fraction: 0.0280)
06 Infiltration         AUROC: 0.9649  (pos fraction: 0.0467)
07 Lung_Opacity         AUROC: 0.9398  (pos fraction: 0.0860)
08 Nodule_Mass          AUROC: 0.9216  (pos fraction: 0.0567)
09 Other_lesion         AUROC: 0.9059  (pos fraction: 0.0727)
10 Pleural_effusion     AUROC: 0.9701  (pos fraction: 0.069

In [13]:
# === Safe Inference + Grad-CAM overlays (robust) ===
import random, traceback
from pathlib import Path
import numpy as np
import torch
from tqdm.auto import tqdm
from PIL import Image
import matplotlib.pyplot as plt

# grad-cam imports
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image

# Ensure val_loader uses num_workers=0 for safety in notebook
from torch.utils.data import DataLoader
val_loader = DataLoader(val_ds, batch_size=BATCH, shuffle=False, num_workers=0, pin_memory=True)

# checkpoint selection (reuse)
MODEL_DIR = Path("/kaggle/working/models")
best_ckpt = MODEL_DIR / "resnet50_multilabel_best (1).pth"
final_ckpt = MODEL_DIR / "resnet50_multilabel_final.pth"
ckpt_to_load = best_ckpt if best_ckpt.exists() else final_ckpt
print("Loading checkpoint:", ckpt_to_load)

# reload model architecture and weights
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = load_model_from_checkpoint(model, ckpt_to_load, device)

# ---------- Inference (already worked before) ----------
probs, targets = infer_loader(model, val_loader, device)
metrics = multilabel_metrics(targets, probs, threshold=0.5)

# ---------- Grad-CAM helpers ----------
def get_resnet_target_layer(model):
    # try most common: last conv of layer4
    try:
        return model.layer4[-1].conv3
    except Exception:
        # fallback: return last Conv2d
        for m in reversed(list(model.modules())):
            if isinstance(m, torch.nn.Conv2d):
                return m
    raise RuntimeError("No Conv2d layer found in model")

def make_overlay_and_save(model, img_path: Path, class_idx: int, transform_for_model, save_path: Path, device_str: str):
    """
    Creates Grad-CAM overlay for img_path and saves to save_path.
    Returns True on success, False on failure.
    """
    try:
        # load raw PIL
        pil = Image.open(img_path).convert("RGB")
        # create model input tensor (1,C,H,W)
        inp_tensor = transform_for_model(pil).unsqueeze(0).to(device)

        # create float image for overlay in range [0,1] with same H,W as model input
        # transform_for_model may include normalization and ToTensor; we want raw resized image
        # So resize pil to target HxW of the model input
        # Determine H,W from inp_tensor shape
        _, C, H, W = inp_tensor.shape
        img_for_overlay = np.array(pil.resize((W, H))).astype(np.float32) / 255.0

        # pick target layer
        target_layer = get_resnet_target_layer(model)

        # instantiate GradCAM (do not pass use_cuda kw - versions differ)
        # GradCAM will automatically use CUDA if model is on CUDA
        cam = GradCAM(model=model, target_layers=[target_layer])

        targets = [ClassifierOutputTarget(class_idx)]
        # compute cam (H x W)
        grayscale_cam = cam(input_tensor=inp_tensor, targets=targets)[0]
        # overlay
        visualization = show_cam_on_image(img_for_overlay, grayscale_cam, use_rgb=True)

        # save
        save_path.parent.mkdir(parents=True, exist_ok=True)
        plt.imsave(save_path, visualization)
        # try to cleanup internal hooks to avoid destructor warnings if available
        if hasattr(cam, "clear_hooks"):
            try:
                cam.clear_hooks()
            except Exception:
                pass
        return True
    except Exception as e:
        print("Grad-CAM failed for", img_path, "err:", str(e))
        traceback.print_exc()
        return False

# ---------- Generate Grad-CAMs for a few sample val images ----------
out_dir = Path("gradcam_outputs")
out_dir.mkdir(exist_ok=True, parents=True)

random.seed(42)
sample_ids = random.sample(val_ids, min(12, len(val_ids)))

print("Creating Grad-CAMs for", len(sample_ids), "images ...")
for img_id in sample_ids:
    img_path = val_img_dir / f"{img_id}.jpg"
    # compute probs for single image (fast)
    with torch.no_grad():
        img_tensor = val_tfms(Image.open(img_path).convert("RGB")).unsqueeze(0).to(device)
        logits = model(img_tensor)
        probs_img = torch.sigmoid(logits).cpu().numpy()[0]
    # pick top predicted class
    top_cls = int(np.argmax(probs_img))
    save_path = out_dir / f"{img_id}_top{top_cls}_{class_names[top_cls]}.png"
    ok = make_overlay_and_save(model, img_path, top_cls, val_tfms, save_path, device_str='cuda' if torch.cuda.is_available() else 'cpu')
    if ok:
        print("Saved:", save_path, " top_prob:", probs_img[top_cls])
    else:
        print("Skipped:", img_id)

print("Done. Grad-CAM outputs in:", out_dir.resolve())


Loading checkpoint: /kaggle/working/models/resnet50_multilabel_best (1).pth


Infer:   0%|          | 0/47 [00:00<?, ?it/s]

F1 (macro/micro/samples): 0.4557527218433765 0.7765800042274361 0.8634333402980462
Precision (macro): 0.6049614860134169 Recall (macro): 0.40100188805834924
Torchmetrics Multilabel AUROC: 0.94955
Per-class AUROC (first 8): [     0.9816      0.9504      0.9133      0.9809      0.9784       0.941      0.9649      0.9398]
Creating Grad-CAMs for 12 images ...
Saved: gradcam_outputs/a7ae31b4052444b8f576b657b1753445_top13_Pulmonary_fibrosis.png  top_prob: 0.8020872
Saved: gradcam_outputs/f7d12f21dfedde3c815096d60fb5df44_top14_No_finding.png  top_prob: 0.9999883
Saved: gradcam_outputs/01815f05a4a4e424181a6d3be101dcd0_top14_No_finding.png  top_prob: 0.9885522
Saved: gradcam_outputs/902b03191f0a6bc2e7e47be16b63c38c_top0_Aortic_enlargement.png  top_prob: 0.99022794
Saved: gradcam_outputs/a838e79ba2e9716bc790a76f7ae1c94e_top3_Cardiomegaly.png  top_prob: 0.84627056
Saved: gradcam_outputs/c1253501ece20b237303679fb3b4310a_top14_No_finding.png  top_prob: 0.9999958
Saved: gradcam_outputs/6c4e3ecdd8533