In [1]:
# Cell 1 ── imports & global config
import time, random, torch, torch.nn as nn, torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import numpy as np

DEVICE = "mps" if torch.backends.mps.is_available() else "cuda"
BS      = 128
LR      = 0.05
torch.manual_seed(0); random.seed(0); np.random.seed(0)
torch.backends.cudnn.benchmark = True
print("Device:", DEVICE)


Device: mps


In [2]:
# Cell 2 ── single-worker loaders for every resolution
def make_loader(size, train):
    tf = transforms.Compose([
        transforms.RandomResizedCrop(size, scale=(0.6,1.0)) if train else transforms.Resize(size),
        transforms.RandomHorizontalFlip()                   if train else transforms.Lambda(lambda x:x),
        transforms.ToTensor()
    ])
    split = "train" if train else "test"
    ds = datasets.STL10("data", split=split, download=True, transform=tf)
    return DataLoader(ds, batch_size=BS, shuffle=train, num_workers=0)

SIZES   = [32, 40, 56, 72, 96]
train_ld= {s: make_loader(s, True)  for s in SIZES}
val_ld  = {s: make_loader(s, False) for s in SIZES}
print("Loaders ready.")

Loaders ready.


In [3]:
# Cell 3 ── helpers
loss_CE = nn.CrossEntropyLoss(label_smoothing=0.1)

def accuracy(net, loader):
    net.eval(); c=t=0
    with torch.no_grad():
        for x,y in loader:
            c += (net(x.to(DEVICE)).argmax(1) == y.to(DEVICE)).sum().item()
            t += y.size(0)
    return 100 * c / t

def train_one_epoch(net, loader, opt):
    net.train()
    for x,y in loader:
        x,y = x.to(DEVICE), y.to(DEVICE)
        opt.zero_grad(set_to_none=True)
        loss_CE(net(x), y).backward(); opt.step()

def resnet18():
    m = models.resnet18(num_classes=10)
    m.conv1  = nn.Conv2d(3,64,3,1,1,bias=False)  # 3×3 stem
    m.maxpool= nn.Identity()
    return m.to(DEVICE)

In [5]:
# Cell X – seeds loop WITH epoch-level prints
import time, math, numpy as np, torch, random

SEEDS           = [41, 42, 43, 44, 45, 46, 47, 48, 49, 50 ]  
ladder          = [32,40,56,72,96]
epochs_step  = [5, 5, 3, 2, 20]    # same curriculum
baseline_times, ladder_times = [], []

def run_baseline(seed):
    torch.manual_seed(seed); random.seed(seed); np.random.seed(seed)
    net = resnet18()
    opt = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=1e-4)
    t0  = time.perf_counter()
    for e in range(40):
        train_one_epoch(net, train_ld[96], opt)
        acc = accuracy(net, val_ld[96])
        mins= (time.perf_counter()-t0)/60
        print(f"[seed {seed}] BASE e{e:02d} | val {acc:5.1f}% | {mins:5.2f} m")
        if acc >= 60: break
    return (time.perf_counter()-t0)/60

# ⬇️  Paste this as a NEW cell  ⬇️
# ---------------------------------------------------
def run_ladder(seed):
    """
    Progressive-resize training with a single-epoch BatchNorm freeze
    on the very first 96-px epoch.  All other logic unchanged.
    """
    torch.manual_seed(seed); random.seed(seed); np.random.seed(seed)

    net = resnet18()
    opt = torch.optim.SGD(net.parameters(), lr=LR,
                          momentum=0.9, weight_decay=1e-4)
    t0  = time.perf_counter()

    bn_frozen = False          # True after we've done the one-epoch freeze

    for sz, n_ep in zip(ladder, epochs_step):
        for ep in range(n_ep):

            # ---- ❶ Freeze BN running-stats on FIRST 96-px epoch ----------
            if sz == 96 and ep == 0 and not bn_frozen:
                for m in net.modules():
                    if isinstance(m, torch.nn.BatchNorm2d):
                        m.eval()              # stop running-stat updates
                bn_frozen = True

            # ---- ❷ Standard training step -------------------------------
            train_one_epoch(net, train_ld[sz], opt)

            # ---- ❸ Restore BN behaviour on SECOND 96-px epoch ----------
            if sz == 96 and ep == 1 and bn_frozen:
                for m in net.modules():
                    if isinstance(m, torch.nn.BatchNorm2d):
                        m.train()             # resume running-stat updates

            # ---- Logging & early-exit -----------------------------------
            acc  = accuracy(net, val_ld[sz])
            mins = (time.perf_counter() - t0) / 60
            print(f"[seed {seed}] LAD {sz} e{ep:02d} | "
                  f"val {acc:5.1f}% | {mins:5.2f} m")

            if sz == 96 and acc >= 60:        # stop at target accuracy
                break
        if sz == 96 and acc >= 60:
            break

    return (time.perf_counter() - t0) / 60
# ---------------------------------------------------
# End of new cell; re-run the seed loop cell below it


def paired_t(delta):
    n=len(delta); mean=delta.mean(); std=delta.std(ddof=1)
    t=mean/(std/math.sqrt(n)); ci=1.96*std/math.sqrt(n)
    p=2*(1-0.5*(1+math.erf(abs(t)/math.sqrt(2)))) if n>30 else None
    return mean,ci,p,t

for s in SEEDS:
    print(f"\n========= Seed {s} baseline ===========================")
    b = run_baseline(s)
    print(f"→ baseline done in {b:.2f} min\n")

    print(f"========= Seed {s} ladder =============================")
    l = run_ladder(s)
    print(f"→ ladder   done in {l:.2f} min\n")
    baseline_times.append(b); ladder_times.append(l)

# -------- paired statistics ------------------------------------------
base = np.array(baseline_times); lad = np.array(ladder_times); delta=base-lad
m,ci,p,t = paired_t(delta)
print("-----------------------------------------------------------")
print("Seeds:", SEEDS)
for s,b,l in zip(SEEDS, base, lad):
    print(f"seed {s}: Δ = {b-l:4.2f} min   (base {b:5.2f} | lad {l:5.2f})")
print(f"\nMean Δ-time : {m:4.2f} ±{ci:4.2f} (95% CI)")
print(f"Paired t-stat {t:4.2f}   p ≈ {p if p else 'n<30'}")
print(f"Speed-up    : {base.mean()/lad.mean():4.1f}×")
print("-----------------------------------------------------------")





[seed 41] BASE e00 | val  24.2% |  0.30 m
[seed 41] BASE e01 | val  27.3% |  0.58 m
[seed 41] BASE e02 | val  31.8% |  0.85 m
[seed 41] BASE e03 | val  35.1% |  1.13 m
[seed 41] BASE e04 | val  33.7% |  1.41 m
[seed 41] BASE e05 | val  33.8% |  1.69 m
[seed 41] BASE e06 | val  45.4% |  1.97 m
[seed 41] BASE e07 | val  45.2% |  2.25 m
[seed 41] BASE e08 | val  39.4% |  2.54 m
[seed 41] BASE e09 | val  46.2% |  2.82 m
[seed 41] BASE e10 | val  41.5% |  3.10 m
[seed 41] BASE e11 | val  42.0% |  3.38 m
[seed 41] BASE e12 | val  38.7% |  3.66 m
[seed 41] BASE e13 | val  50.4% |  3.94 m
[seed 41] BASE e14 | val  52.5% |  4.22 m
[seed 41] BASE e15 | val  51.4% |  4.50 m
[seed 41] BASE e16 | val  53.3% |  4.79 m
[seed 41] BASE e17 | val  55.9% |  5.07 m
[seed 41] BASE e18 | val  58.1% |  5.35 m
[seed 41] BASE e19 | val  63.8% |  5.63 m
→ baseline done in 5.63 min

[seed 41] LAD 32 e00 | val  22.9% |  0.10 m
[seed 41] LAD 32 e01 | val  30.1% |  0.16 m
[seed 41] LAD 32 e02 | val  41.4% |  0.22 