# Pneumonia Detection: Anchor-Free vs. Anchor-Based Object Detection

**NAML Course Project — Politecnico di Milano**

This notebook runs the full training and evaluation pipeline on Google Colab with CUDA GPU.

Three models compared:
1. **FCOS** — anchor-free (paper's method)
2. **RetinaNet** — anchor-based, one-stage
3. **Faster R-CNN** — anchor-based, two-stage

> Make sure to set **Runtime → Change runtime type → T4 GPU** before running.

## 1. Setup

In [None]:
# Verify GPU is available
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available:  {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU:             {torch.cuda.get_device_name(0)}")
    props = torch.cuda.get_device_properties(0)
    print(f"VRAM:            {props.total_memory / 1e9:.1f} GB")
else:
    print("WARNING: No GPU detected! Go to Runtime > Change runtime type > T4 GPU")

In [6]:
# Upload project files
# Option A: Upload ZIP (run this cell, then upload your Project_Pneumonia_Detection.zip)
import os
from google.colab import files

if not os.path.exists("src"):
    print("Upload your project ZIP file...")
    uploaded = files.upload()
    zip_name = list(uploaded.keys())[0]
    !unzip -q "{zip_name}"
    # If extracted into a subdirectory, move into it
    if os.path.exists("Project_Pneumonia_Detection/src"):
        os.chdir("Project_Pneumonia_Detection")
    print(f"Working directory: {os.getcwd()}")
else:
    print(f"Project already loaded. Working directory: {os.getcwd()}")

Upload your project ZIP file...


KeyboardInterrupt: 

In [None]:
# Option B: Mount Google Drive (if project is stored there)
# Uncomment the lines below if using Google Drive instead of ZIP upload

# from google.colab import drive
# drive.mount('/content/drive')
# os.chdir('/content/drive/MyDrive/<path-to>/Project_Pneumonia_Detection')

In [None]:
# Install dependencies
!pip install -q pydicom seaborn

In [None]:
# Verify project structure
import os
required = ["main.py", "src/config.py", "src/engine.py", "src/evaluate.py",
            "src/dataset.py", "src/transforms.py", "src/visualize.py",
            "src/models/__init__.py", "src/models/fcos.py",
            "src/models/retinanet.py", "src/models/faster_rcnn.py"]
missing = [f for f in required if not os.path.exists(f)]
if missing:
    print(f"ERROR: Missing files: {missing}")
    print(f"Current directory: {os.getcwd()}")
    print(f"Contents: {os.listdir('.')}")
else:
    print("All project files found.")

## 2. Download RSNA Dataset

In [None]:
# Upload your Kaggle API key (kaggle.json)
# Get it from: https://www.kaggle.com/settings > API > Create New Token

import os
if not os.path.exists(os.path.expanduser("~/.kaggle/kaggle.json")):
    from google.colab import files
    print("Upload your kaggle.json file...")
    uploaded = files.upload()
    !mkdir -p ~/.kaggle && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json
    print("Kaggle API key configured.")
else:
    print("Kaggle API key already configured.")

In [None]:
# Download the RSNA Pneumonia Detection Challenge dataset
import os
if not os.path.exists("data/stage_2_train_labels.csv"):
    !pip install -q kaggle
    !kaggle competitions download -c rsna-pneumonia-detection-challenge
    !mkdir -p data
    !unzip -q rsna-pneumonia-detection-challenge.zip -d data/
    !rm -f rsna-pneumonia-detection-challenge.zip
    print("Dataset downloaded and extracted.")
else:
    print("Dataset already present.")

# Show dataset stats
import pandas as pd
df = pd.read_csv("data/stage_2_train_labels.csv")
n_patients = df["patientId"].nunique()
n_positive = df[df["Target"] == 1]["patientId"].nunique()
print(f"Total patients: {n_patients}")
print(f"Positive (pneumonia): {n_positive}")
print(f"Negative: {n_patients - n_positive}")

In [None]:
# Preprocess DICOM → PNG (10-50x faster data loading)
import os
png_dir = "data/stage_2_train_images_png"
if not os.path.exists(png_dir) or len(os.listdir(png_dir)) < 100:
    !PYTHONUNBUFFERED=1 python -m src.preprocess --data-dir data/ --compress 1
else:
    print(f"PNG images already exist ({len(os.listdir(png_dir))} files).")

## 3. Training Configuration

Adjust these settings based on your Colab GPU and desired training duration.

In [None]:
# ============================================================
# TRAINING SETTINGS — adjust these as needed
# ============================================================

EPOCHS = 20           # Paper uses 20+ epochs
BATCH_SIZE = 8        # 8 works well on T4 (16 GB); use 4 if OOM
MAX_SAMPLES = None    # None = full dataset; set to 500 for quick test
LEARNING_RATE = 1e-4  # Stable LR for Adam + detection models
IMAGE_SIZE = 512      # Input image size
VAL_FREQUENCY = 2     # Validate every N epochs (2 = 50% faster)
EARLY_STOPPING = 5    # Stop after N validations without improvement
RESUME = False        # Set True to resume after a crash/timeout

# ============================================================

resume_flag = " --resume" if RESUME else ""

cmd = (
    f"PYTHONUNBUFFERED=1 python main.py --mode full --device cuda"
    f" --epochs {EPOCHS} --batch-size {BATCH_SIZE}"
    f" --lr {LEARNING_RATE} --image-size {IMAGE_SIZE}"
    f" --val-frequency {VAL_FREQUENCY}"
    f" --early-stopping {EARLY_STOPPING}"
    f" --prefetch-factor 4"
    f"{resume_flag}"
)
if MAX_SAMPLES is not None:
    cmd += f" --max-samples {MAX_SAMPLES}"

print(f"Command: {cmd}")
print(f"\nEstimated time on T4 GPU:")
if MAX_SAMPLES:
    print(f"  ~{MAX_SAMPLES // 100 * EPOCHS} minutes (subset of {MAX_SAMPLES} patients)")
else:
    print(f"  ~{EPOCHS * 8} minutes for full dataset ({EPOCHS} epochs)")

## 4. Run Full Pipeline

This trains all 3 models, evaluates them, and generates all comparison plots.

In [None]:
# Run the full pipeline (train + evaluate + compare + visualize)
!{cmd}

## 5. Results

In [None]:
# Load and display metrics
import json

with open("results/all_metrics.json") as f:
    metrics = json.load(f)

print("=" * 70)
print("  DETECTION PERFORMANCE (%)")
print("=" * 70)
print(f"{'Model':<16} {'AP@0.5':>8} {'AP@.5:.95':>10} {'AP_M':>8} {'AP_L':>8} {'AR@10':>8} {'AR_L':>8}")
print("-" * 70)
for name, m in metrics.items():
    print(f"{name:<16} {m['AP@0.5']*100:>8.1f} {m['AP@0.5:0.95']*100:>10.1f}"
          f" {m['AP_M']*100:>8.1f} {m['AP_L']*100:>8.1f}"
          f" {m['AR@10']*100:>8.1f} {m['AR_L']*100:>8.1f}")

print()
print("=" * 70)
print("  PATIENT-LEVEL CLASSIFICATION (%)")
print("=" * 70)
print(f"{'Model':<16} {'Accuracy':>10} {'Precision':>10} {'Recall':>10} {'F1':>10}")
print("-" * 70)
for name, m in metrics.items():
    print(f"{name:<16} {m['patient_accuracy']*100:>10.1f} {m['patient_precision']*100:>10.1f}"
          f" {m['patient_recall']*100:>10.1f} {m['patient_f1']*100:>10.1f}")

### Training Loss

In [None]:
from IPython.display import Image, display
display(Image(filename="results/training_loss.png", width=800))

### Validation AP@0.5 Over Training

In [None]:
display(Image(filename="results/val_ap_over_epochs.png", width=800))

### AP & AR Comparison

In [None]:
display(Image(filename="results/ap_comparison.png", width=800))
display(Image(filename="results/ar_comparison.png", width=800))

### Precision-Recall Curve

In [None]:
display(Image(filename="results/pr_curve.png", width=600))

### AP vs IoU Threshold

In [None]:
display(Image(filename="results/ap_vs_iou.png", width=800))

### Patient-Level Classification

In [None]:
display(Image(filename="results/classification_metrics.png", width=800))

### Training Speed

In [None]:
display(Image(filename="results/epoch_times.png", width=600))

### Detection Samples

In [None]:
display(Image(filename="results/detection_samples.png", width=900))

## 6. Download Results

In [None]:
# Package all results and checkpoints for download
!zip -r pneumonia_results.zip results/ checkpoints/

from google.colab import files
files.download("pneumonia_results.zip")
print("Download started. Extract the ZIP to update your local project.")

---

After downloading, extract `pneumonia_results.zip` into your local `Project_Pneumonia_Detection/` folder to update the report and presentation with the new GPU-trained results.