# Run Camelyon17 (WILDS)

This notebook is a thin orchestrator for running ERM, DANN, and ME-IIS on Camelyon17 using the official `wilds` loader + `dataset.eval(...)`.

It is designed to be re-runnable: existing datasets/checkpoints are reused, and training is skipped unless `FORCE_RERUN=True`.


## 1) Drive mount + paths


In [None]:
import os, sys

IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/drive")

# Outputs/checkpoints (Drive by default in Colab).
DRIVE_ROOT = os.environ.get(
    "MEIIS_DRIVE_ROOT",
    "/content/drive/MyDrive/ME-IIS" if IN_COLAB else os.getcwd(),
)

# Dataset cache root:
# - Drive: persistent across runtimes (no re-download), but slower I/O.
# - Local scratch: faster, but wiped when runtime resets.
USE_DRIVE_WILDS_CACHE = True if IN_COLAB else False
DATA_ROOT_DEFAULT = (
    os.path.join(DRIVE_ROOT, "data", "wilds")
    if (IN_COLAB and USE_DRIVE_WILDS_CACHE)
    else ("/content/data/wilds" if IN_COLAB else os.path.join(os.getcwd(), "datasets", "wilds"))
)
DATA_ROOT = os.environ.get("WILDS_DATA_ROOT", DATA_ROOT_DEFAULT)
os.environ["WILDS_DATA_ROOT"] = DATA_ROOT
CKPT_ROOT = os.path.join(DRIVE_ROOT, "checkpoints_camelyon17")
OUT_ROOT = os.path.join(DRIVE_ROOT, "outputs_camelyon17")

print("DATA_ROOT:", DATA_ROOT)
print("CKPT_ROOT:", CKPT_ROOT)
print("OUT_ROOT:", OUT_ROOT)


## 2) Repo + commit


In [None]:
import os, subprocess

if os.path.isdir(".git"):
    commit = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip()
    print("commit:", commit)
else:
    print("NOTE: current working directory is not a git repo; skipping commit print.")


## 3) Install deps


In [None]:
!pip -q install -r requirements.txt


## 4) Configure experiment


In [None]:
FORCE_RERUN = False
SEEDS = [0]

# Protocol:
# - "uda_target": adapt on test_unlabeled (target unlabeled), select on val, report on test.
# - "align_val": adapt on val_unlabeled, evaluate on val (debug/ablation only).
SPLIT_MODE = "uda_target"  # uda_target | align_val

# Fair default compute budget (keep identical across methods unless intentionally changed).
EPOCHS = 5
BATCH_SIZE = 64
BACKBONE = "densenet121"  # densenet121 | resnet50
PRETRAINED = False


## 5) Run ERM + DANN + ME-IIS


In [None]:
from dataclasses import replace

from src.run_experiments import default_camelyon17_configs, run_experiments

configs = []
for seed in SEEDS:
    for cfg in default_camelyon17_configs(seed=seed, split_mode=SPLIT_MODE, force_rerun=FORCE_RERUN):
        cfg = replace(cfg, epochs=EPOCHS, batch_size=BATCH_SIZE, backbone=BACKBONE, pretrained=PRETRAINED)
        configs.append(cfg)

summary_path = run_experiments(
    configs,
    data_root=DATA_ROOT,
    ckpt_root=CKPT_ROOT,
    out_root=OUT_ROOT,
)
print("summary_path:", summary_path)


## 6) Aggregate metrics + plot


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv(summary_path)
display(df)

fig, ax = plt.subplots(figsize=(6, 3))
ax.bar(df["algorithm"].astype(str), df["eval_acc"].astype(float))
ax.set_title("Camelyon17 (WILDS)")
ax.set_ylabel("Eval accuracy")
ax.grid(True, axis="y", alpha=0.3)
plt.show()


## 7) Save outputs to Drive

All artifacts are already written under `OUT_ROOT` and `CKPT_ROOT` (defaults to Drive in Colab).