### Download & extract PACS dataset (optional)

In [1]:
# Install deps (safe to re-run)
!pip -q install datasets pillow tqdm

import os, io
from PIL import Image
from tqdm import tqdm
from datasets import load_dataset

root = "/content/data/PACS"
os.makedirs(root, exist_ok=True)

ds = load_dataset("flwrlabs/pacs", split="train")

if "category" in ds.features:
    label_field = "category"
elif "class" in ds.features:
    label_field = "class"
elif "label" in ds.features:
    label_field = "label"
else:
    raise RuntimeError(f"Unexpected schema: {ds.features}")

def class_name(row):
    v = row[label_field]
    if isinstance(v, int):
        return ds.features[label_field].names[v]
    return str(v)

# Normalize domains → art_painting/cartoon/photo/sketch
def norm_domain(v: str):
    s = str(v).strip().lower().replace(" ", "_").replace("-", "_")
    if s in {"art_painting", "cartoon", "photo", "sketch"}:
        return s
    # map common variants just in case
    if s in {"artpainting", "art_paintings"}:
        return "art_painting"
    return s  # fallback (we'll skip unknowns below)

# Write images to /content/data/PACS/<domain>/<class>/<i>.jpg
for i, row in tqdm(enumerate(ds), total=len(ds)):
    dom = norm_domain(row["domain"])
    if dom not in {"art_painting", "cartoon", "photo", "sketch"}:
        continue  # skip anything weird
    cls = class_name(row)
    out_dir = os.path.join(root, dom, cls)
    os.makedirs(out_dir, exist_ok=True)

    img = row["image"]
    if not isinstance(img, Image.Image):
        # Some datasets provide bytes; convert to PIL.Image
        img = Image.open(io.BytesIO(img["bytes"])).convert("RGB")
    img.save(os.path.join(out_dir, f"{i}.jpg"), quality=95)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/191M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9991 [00:00<?, ? examples/s]

100%|██████████| 9991/9991 [00:13<00:00, 728.68it/s]


### Imports, constants, and seed

In [2]:
import torch, random, os, numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, ConcatDataset
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm
import numpy as np
import pandas as pd
import os

DATA_ROOT = "/content/data/PACS"
SOURCES   = ["art_painting", "cartoon", "photo"]
TARGET    = "sketch"
IMG_SIZE  = 224
BATCH_SIZE = 64
NUM_WORKERS = 2

# seed set for reproducibility
def set_seed(seed=1337):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(1337)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [3]:
_to_rgb = transforms.Lambda(lambda im: im.convert("RGB"))

def make_loaders(data_root, img_size=224, batch_size=64, num_workers=2, sources=None, target=None):
    tfm_train = transforms.Compose([
        _to_rgb,
        transforms.RandomResizedCrop(img_size, scale=(0.7, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(0.1, 0.1, 0.1, 0.05),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
    ])
    tfm_eval = transforms.Compose([
        _to_rgb,
        transforms.Resize(256),
        transforms.CenterCrop(img_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
    ])

    def load_domain(name, tfm):
        p = Path(data_root)/name
        assert p.exists(), f"Missing domain folder: {p}"
        return datasets.ImageFolder(str(p), transform=tfm)

    # Build datasets
    src_train, per_domain_eval = [], {}
    class_to_idx = None

    for d in sources:
        ds_tr = load_domain(d, tfm_train)
        ds_ev = load_domain(d, tfm_eval)
        if class_to_idx is None:
            class_to_idx = ds_tr.class_to_idx
        else:
            assert ds_tr.class_to_idx == class_to_idx, "Class mapping differs across domains."
        assert ds_ev.class_to_idx == class_to_idx
        src_train.append(ds_tr)
        per_domain_eval[d] = ds_ev

    target_eval = load_domain(target, tfm_eval)
    assert target_eval.class_to_idx == class_to_idx
    per_domain_eval[target] = target_eval

    # Loaders
    train_ds = ConcatDataset(src_train)
    train_loader = DataLoader(
        train_ds, batch_size=batch_size, shuffle=True,
        num_workers=num_workers, pin_memory=True,
        persistent_workers=(num_workers > 0), drop_last=True
    )
    eval_loaders = {
        d: DataLoader(ds, batch_size=batch_size, shuffle=False,
                      num_workers=num_workers, pin_memory=True,
                      persistent_workers=(num_workers > 0))
        for d, ds in per_domain_eval.items()
    }

    num_classes = len(target_eval.classes)
    return train_loader, eval_loaders, num_classes, target_eval.classes

train_loader, eval_loaders, num_classes, classes = make_loaders(
    DATA_ROOT, IMG_SIZE, BATCH_SIZE, NUM_WORKERS, SOURCES, TARGET
)
print(f"Train size: {len(train_loader.dataset)} | Num classes: {num_classes}")
print("Domains loaded:", list(eval_loaders.keys()))
print("Classes:", classes)

Train size: 6062 | Num classes: 7
Domains loaded: ['art_painting', 'cartoon', 'photo', 'sketch']
Classes: ['dog', 'elephant', 'giraffe', 'guitar', 'horse', 'house', 'person']


In [4]:
def build_resnet50(num_classes: int):
    # pretrained ResNet-50, replace final layer
    m = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

@torch.no_grad()
def evaluate(model, loaders, device):
    model.eval()
    acc = {}
    for dname, loader in loaders.items():
        correct, total = 0, 0
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            pred = logits.argmax(1)
            correct += (pred == y).sum().item()
            total   += y.numel()
        acc[dname] = correct / max(total, 1)
    return acc


def train_erm(
    sources,
    target,
    train_loader,
    eval_loaders,
    num_classes,
    epochs=20,
    lr=3e-4,
    wd=0.05,
    out_dir="outputs_erm",
    seed=1337,
    use_amp=True,
):
    np.random.seed(seed); torch.manual_seed(seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = build_resnet50(num_classes).to(device)

    opt   = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    sched = optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
    loss_fn = nn.CrossEntropyLoss()
    scaler = torch.cuda.amp.GradScaler(enabled=(use_amp and device.type=="cuda"))

    best_target = 0.0
    logs = []
    os.makedirs(out_dir, exist_ok=True)

    for ep in range(1, epochs+1):
        model.train()
        running_loss = 0.0
        seen = 0

        pbar = tqdm(train_loader, desc=f"Epoch {ep}/{epochs}", leave=False)
        for x, y in pbar:
            x, y = x.to(device), y.to(device)
            opt.zero_grad(set_to_none=True)

            with torch.cuda.amp.autocast(enabled=(use_amp and device.type=="cuda")):
                logits = model(x)
                loss = loss_fn(logits, y)

            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()

            running_loss += loss.item() * y.size(0)
            seen += y.size(0)

        sched.step()

        acc = evaluate(model, eval_loaders, device)
        src_accs = [acc[d] for d in sources]
        avg_src = float(np.mean(src_accs))
        worst_src = float(min(src_accs))

        # Log
        row = {
            "epoch": ep,
            "train_loss": running_loss / max(seen, 1),
            "target_acc": acc[target],
            "avg_source_acc": avg_src,
            "worst_source_acc": worst_src,
        }
        for d in sources + [target]:
            row[f"acc_{d}"] = acc[d]
        logs.append(row)

        print(f"[Ep {ep:02d}] loss={row['train_loss']:.4f} | "
              f"tgt({target})={acc[target]:.3f} | src_avg={avg_src:.3f} | worst_src={worst_src:.3f}")

        # Save best-by-target
        if acc[target] > best_target:
            best_target = acc[target]
            torch.save(model.state_dict(), os.path.join(out_dir, "best_model.pt"))

    df = pd.DataFrame(logs)
    df.to_csv(os.path.join(out_dir, "training_log.csv"), index=False)
    print(f"\nBest target ({target}) accuracy: {best_target:.3f}")
    return df


In [None]:
# Uses the loaders you already created:
# train_loader, eval_loaders, num_classes, classes = make_loaders(...)

df_logs = train_erm(
    sources=SOURCES,
    target=TARGET,
    train_loader=train_loader,
    eval_loaders=eval_loaders,
    num_classes=num_classes,
    epochs=20,
    lr=3e-4,
    wd=0.05,
    out_dir="outputs_erm",
    seed=1337,
    use_amp=True,   # set False if you hit AMP issues
)

# Peek last few rows
df_logs.tail()


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 143MB/s]
  scaler = torch.cuda.amp.GradScaler(enabled=(use_amp and device.type=="cuda"))
  with torch.cuda.amp.autocast(enabled=(use_amp and device.type=="cuda")):


[Ep 01] loss=0.4052 | tgt(sketch)=0.711 | src_avg=0.967 | worst_src=0.958




[Ep 02] loss=0.1278 | tgt(sketch)=0.582 | src_avg=0.976 | worst_src=0.965




[Ep 03] loss=0.0820 | tgt(sketch)=0.663 | src_avg=0.985 | worst_src=0.980




[Ep 04] loss=0.0750 | tgt(sketch)=0.601 | src_avg=0.990 | worst_src=0.985




In [None]:
import pandas as pd

log_path = "outputs_erm/training_log.csv"
df = pd.read_csv(log_path)

best_idx = df["target_acc"].idxmax()
best = df.iloc[best_idx]

summary = {
    "best_epoch": int(best["epoch"]),
    "target_sketch_acc": round(float(best["target_acc"]), 4),
    "avg_source_acc": round(float(best["avg_source_acc"]), 4),
    "worst_source_acc": round(float(best["worst_source_acc"]), 4),
}
for d in SOURCES + [TARGET]:
    summary[f"{d}_acc"] = round(float(best[f"acc_{d}"]), 4)

summary
