In [1]:
# Cell 1 ── imports & global config
import time, random, torch, torch.nn as nn, torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import numpy as np

DEVICE = "mps" if torch.backends.mps.is_available() else "cuda"
BS      = 128
LR      = 0.05
torch.manual_seed(0); random.seed(0); np.random.seed(0)
torch.backends.cudnn.benchmark = True
print("Device:", DEVICE)


Device: mps


In [2]:
# Cell 2 ── single-worker loaders for every resolution
def make_loader(size, train):
    tf = transforms.Compose([
        transforms.RandomResizedCrop(size, scale=(0.6,1.0)) if train else transforms.Resize(size),
        transforms.RandomHorizontalFlip()                   if train else transforms.Lambda(lambda x:x),
        transforms.ToTensor()
    ])
    split = "train" if train else "test"
    ds = datasets.STL10("data", split=split, download=True, transform=tf)
    return DataLoader(ds, batch_size=BS, shuffle=train, num_workers=0)

SIZES   = [32, 40, 56, 72, 96]
train_ld= {s: make_loader(s, True)  for s in SIZES}
val_ld  = {s: make_loader(s, False) for s in SIZES}
print("Loaders ready.")


Loaders ready.


In [3]:
# Cell 3 ── helpers
loss_CE = nn.CrossEntropyLoss(label_smoothing=0.1)

def accuracy(net, loader):
    net.eval(); c=t=0
    with torch.no_grad():
        for x,y in loader:
            c += (net(x.to(DEVICE)).argmax(1) == y.to(DEVICE)).sum().item()
            t += y.size(0)
    return 100 * c / t

def train_one_epoch(net, loader, opt):
    net.train()
    for x,y in loader:
        x,y = x.to(DEVICE), y.to(DEVICE)
        opt.zero_grad(set_to_none=True)
        loss_CE(net(x), y).backward(); opt.step()

def resnet18():
    m = models.resnet18(num_classes=10)
    m.conv1  = nn.Conv2d(3,64,3,1,1,bias=False)  # 3×3 stem
    m.maxpool= nn.Identity()
    return m.to(DEVICE)


In [4]:
# Cell 4 ── baseline 96 px
net = resnet18()
opt = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=1e-4)
t0  = time.perf_counter()

for e in range(40):                          # early-stop at 60 %
    train_one_epoch(net, train_ld[96], opt)
    acc = accuracy(net,   val_ld[96])
    mins= (time.perf_counter()-t0)/60
    print(f"[BASE 96] e{e:02d} | val {acc:5.1f}% | {mins:5.2f} min")
    if acc >= 60: break

baseline_time, baseline_acc = mins, acc


[BASE 96] e00 | val  21.9% |  0.32 min
[BASE 96] e01 | val  27.5% |  0.59 min
[BASE 96] e02 | val  25.2% |  0.87 min
[BASE 96] e03 | val  31.1% |  1.14 min
[BASE 96] e04 | val  36.3% |  1.42 min
[BASE 96] e05 | val  34.6% |  1.69 min
[BASE 96] e06 | val  35.9% |  1.97 min
[BASE 96] e07 | val  39.2% |  2.24 min
[BASE 96] e08 | val  35.5% |  2.52 min
[BASE 96] e09 | val  37.3% |  2.79 min
[BASE 96] e10 | val  27.1% |  3.07 min
[BASE 96] e11 | val  37.8% |  3.34 min
[BASE 96] e12 | val  50.0% |  3.62 min
[BASE 96] e13 | val  53.1% |  3.89 min
[BASE 96] e14 | val  53.2% |  4.16 min
[BASE 96] e15 | val  57.3% |  4.44 min
[BASE 96] e16 | val  52.5% |  4.71 min
[BASE 96] e17 | val  52.2% |  4.99 min
[BASE 96] e18 | val  46.7% |  5.27 min
[BASE 96] e19 | val  52.6% |  5.54 min
[BASE 96] e20 | val  56.6% |  5.82 min
[BASE 96] e21 | val  59.7% |  6.09 min
[BASE 96] e22 | val  58.5% |  6.37 min
[BASE 96] e23 | val  57.6% |  6.64 min
[BASE 96] e24 | val  57.5% |  6.92 min
[BASE 96] e25 | val  52.8

In [5]:
# Cell 5 ── smooth ladder 32→40→56→72→96 and summary
ladder        = [32, 40, 56, 72, 96]
epochs_step   = [3, 3, 2, 2, 30]          # tune freely

net = resnet18()
opt = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=1e-4)
t0  = time.perf_counter()

for s, n in zip(ladder, epochs_step):
    for ep in range(n):
        train_one_epoch(net, train_ld[s], opt)
        acc = accuracy(net, val_ld[s])
        mins= (time.perf_counter()-t0)/60
        print(f"[LAD {s}] e{ep:02d} | val {acc:5.1f}% | {mins:5.2f} min")
        if s == 96 and acc >= 60: break
    if s == 96 and acc >= 60: break

ladder_time, ladder_acc = mins, acc

print("\nSummary:")
print(f"baseline-96 : {baseline_acc:5.2f}% | {baseline_time:5.2f} min")
print(f"ladder-prog : {ladder_acc:5.2f}% | {ladder_time:5.2f} min")
print(f"\nSpeed-up ► {baseline_time/ladder_time:5.1f}× faster to ≥60 % top-1")


[LAD 32] e00 | val  21.4% |  0.10 min
[LAD 32] e01 | val  33.4% |  0.16 min
[LAD 32] e02 | val  29.7% |  0.22 min
[LAD 40] e00 | val  37.4% |  0.34 min
[LAD 40] e01 | val  38.3% |  0.42 min
[LAD 40] e02 | val  37.4% |  0.49 min
[LAD 56] e00 | val  43.8% |  0.64 min
[LAD 56] e01 | val  47.3% |  0.75 min
[LAD 72] e00 | val  41.1% |  0.96 min
[LAD 72] e01 | val  35.9% |  1.15 min
[LAD 96] e00 | val  34.1% |  1.42 min
[LAD 96] e01 | val  48.4% |  1.69 min
[LAD 96] e02 | val  50.6% |  1.97 min
[LAD 96] e03 | val  48.2% |  2.24 min
[LAD 96] e04 | val  56.8% |  2.51 min
[LAD 96] e05 | val  54.2% |  2.79 min
[LAD 96] e06 | val  58.0% |  3.06 min
[LAD 96] e07 | val  43.9% |  3.34 min
[LAD 96] e08 | val  56.6% |  3.61 min
[LAD 96] e09 | val  60.6% |  3.88 min

Summary:
baseline-96 : 61.33% |  7.74 min
ladder-prog : 60.59% |  3.88 min

Speed-up ►   2.0× faster to ≥60 % top-1


In [9]:
# Cell X – seeds loop WITH epoch-level prints
import time, math, numpy as np, torch, random

SEEDS           = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]          # adjust 3→5 if you have overnight time
ladder          = [32,40,56,72,96]
epochs_step  = [5, 5, 3, 2, 20]    # same curriculum
baseline_times, ladder_times = [], []

def run_baseline(seed):
    torch.manual_seed(seed); random.seed(seed); np.random.seed(seed)
    net = resnet18()
    opt = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=1e-4)
    t0  = time.perf_counter()
    for e in range(40):
        train_one_epoch(net, train_ld[96], opt)
        acc = accuracy(net, val_ld[96])
        mins= (time.perf_counter()-t0)/60
        print(f"[seed {seed}] BASE e{e:02d} | val {acc:5.1f}% | {mins:5.2f} m")
        if acc >= 60: break
    return (time.perf_counter()-t0)/60

def run_ladder(seed):
    torch.manual_seed(seed); random.seed(seed); np.random.seed(seed)
    net = resnet18()
    opt = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=1e-4)
    t0  = time.perf_counter()
    for sz, n_ep in zip(ladder, epochs_step):
        for ep in range(n_ep):
            train_one_epoch(net, train_ld[sz], opt)
            acc = accuracy(net, val_ld[sz])
            mins= (time.perf_counter()-t0)/60
            print(f"[seed {seed}] LAD {sz} e{ep:02d} | val {acc:5.1f}% | {mins:5.2f} m")
            if sz==96 and acc>=60: break
        if sz==96 and acc>=60: break
    return (time.perf_counter()-t0)/60

def paired_t(delta):
    n=len(delta); mean=delta.mean(); std=delta.std(ddof=1)
    t=mean/(std/math.sqrt(n)); ci=1.96*std/math.sqrt(n)
    p=2*(1-0.5*(1+math.erf(abs(t)/math.sqrt(2)))) if n>30 else None
    return mean,ci,p,t

for s in SEEDS:
    print(f"\n========= Seed {s} baseline ===========================")
    b = run_baseline(s)
    print(f"→ baseline done in {b:.2f} min\n")

    print(f"========= Seed {s} ladder =============================")
    l = run_ladder(s)
    print(f"→ ladder   done in {l:.2f} min\n")
    baseline_times.append(b); ladder_times.append(l)

# -------- paired statistics ------------------------------------------
base = np.array(baseline_times); lad = np.array(ladder_times); delta=base-lad
m,ci,p,t = paired_t(delta)
print("-----------------------------------------------------------")
print("Seeds:", SEEDS)
for s,b,l in zip(SEEDS, base, lad):
    print(f"seed {s}: Δ = {b-l:4.2f} min   (base {b:5.2f} | lad {l:5.2f})")
print(f"\nMean Δ-time : {m:4.2f} ±{ci:4.2f} (95% CI)")
print(f"Paired t-stat {t:4.2f}   p ≈ {p if p else 'n<30'}")
print(f"Speed-up    : {base.mean()/lad.mean():4.1f}×")
print("-----------------------------------------------------------")



[seed 20] BASE e00 | val  18.1% |  0.27 m
[seed 20] BASE e01 | val  30.0% |  0.55 m
[seed 20] BASE e02 | val  33.7% |  0.83 m
[seed 20] BASE e03 | val  34.4% |  1.10 m
[seed 20] BASE e04 | val  29.0% |  1.38 m
[seed 20] BASE e05 | val  34.7% |  1.66 m
[seed 20] BASE e06 | val  37.1% |  1.93 m
[seed 20] BASE e07 | val  41.5% |  2.21 m
[seed 20] BASE e08 | val  44.1% |  2.49 m
[seed 20] BASE e09 | val  38.7% |  2.76 m
[seed 20] BASE e10 | val  49.0% |  3.04 m
[seed 20] BASE e11 | val  52.3% |  3.32 m
[seed 20] BASE e12 | val  51.0% |  3.60 m
[seed 20] BASE e13 | val  44.5% |  3.87 m
[seed 20] BASE e14 | val  40.6% |  4.15 m
[seed 20] BASE e15 | val  59.3% |  4.42 m
[seed 20] BASE e16 | val  49.3% |  4.70 m
[seed 20] BASE e17 | val  56.6% |  4.98 m
[seed 20] BASE e18 | val  51.8% |  5.25 m
[seed 20] BASE e19 | val  53.4% |  5.53 m
[seed 20] BASE e20 | val  62.0% |  5.80 m
→ baseline done in 5.80 min

[seed 20] LAD 32 e00 | val  20.9% |  0.06 m
[seed 20] LAD 32 e01 | val  23.2% |  0.11 m


In [10]:
# ⬇️  Paste this as a NEW cell  ⬇️
# ---------------------------------------------------
def run_ladder(seed):
    """
    Progressive-resize training with a single-epoch BatchNorm freeze
    on the very first 96-px epoch.  All other logic unchanged.
    """
    torch.manual_seed(seed); random.seed(seed); np.random.seed(seed)

    net = resnet18()
    opt = torch.optim.SGD(net.parameters(), lr=LR,
                          momentum=0.9, weight_decay=1e-4)
    t0  = time.perf_counter()

    bn_frozen = False          # True after we've done the one-epoch freeze

    for sz, n_ep in zip(ladder, epochs_step):
        for ep in range(n_ep):

            # ---- ❶ Freeze BN running-stats on FIRST 96-px epoch ----------
            if sz == 96 and ep == 0 and not bn_frozen:
                for m in net.modules():
                    if isinstance(m, torch.nn.BatchNorm2d):
                        m.eval()              # stop running-stat updates
                bn_frozen = True

            # ---- ❷ Standard training step -------------------------------
            train_one_epoch(net, train_ld[sz], opt)

            # ---- ❸ Restore BN behaviour on SECOND 96-px epoch ----------
            if sz == 96 and ep == 1 and bn_frozen:
                for m in net.modules():
                    if isinstance(m, torch.nn.BatchNorm2d):
                        m.train()             # resume running-stat updates

            # ---- Logging & early-exit -----------------------------------
            acc  = accuracy(net, val_ld[sz])
            mins = (time.perf_counter() - t0) / 60
            print(f"[seed {seed}] LAD {sz} e{ep:02d} | "
                  f"val {acc:5.1f}% | {mins:5.2f} m")

            if sz == 96 and acc >= 60:        # stop at target accuracy
                break
        if sz == 96 and acc >= 60:
            break

    return (time.perf_counter() - t0) / 60
# ---------------------------------------------------
# End of new cell; re-run the seed loop cell below it



In [11]:
SEEDS           = [31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ]  
ladder          = [32,40,56,72,96]
epochs_step  = [5, 5, 3, 2, 20]    # same curriculum
baseline_times, ladder_times = [], []

In [13]:
for s in SEEDS:
    print(f"\n========= Seed {s} baseline ===========================")
    b = run_baseline(s)
    print(f"→ baseline done in {b:.2f} min\n")

    print(f"========= Seed {s} ladder =============================")
    l = run_ladder(s)
    print(f"→ ladder   done in {l:.2f} min\n")
    baseline_times.append(b); ladder_times.append(l)

# -------- paired statistics ------------------------------------------
base = np.array(baseline_times); lad = np.array(ladder_times); delta=base-lad
m,ci,p,t = paired_t(delta)
print("-----------------------------------------------------------")
print("Seeds:", SEEDS)
for s,b,l in zip(SEEDS, base, lad):
    print(f"seed {s}: Δ = {b-l:4.2f} min   (base {b:5.2f} | lad {l:5.2f})")
print(f"\nMean Δ-time : {m:4.2f} ±{ci:4.2f} (95% CI)")
print(f"Paired t-stat {t:4.2f}   p ≈ {p if p else 'n<30'}")
print(f"Speed-up    : {base.mean()/lad.mean():4.1f}×")
print("-----------------------------------------------------------")


[seed 31] BASE e00 | val  27.2% |  0.27 m
[seed 31] BASE e01 | val  29.6% |  0.55 m
[seed 31] BASE e02 | val  31.8% |  0.82 m
[seed 31] BASE e03 | val  29.6% |  1.09 m
[seed 31] BASE e04 | val  29.2% |  1.37 m
[seed 31] BASE e05 | val  33.3% |  1.64 m
[seed 31] BASE e06 | val  29.8% |  1.91 m
[seed 31] BASE e07 | val  40.9% |  2.19 m
[seed 31] BASE e08 | val  46.0% |  2.46 m
[seed 31] BASE e09 | val  36.2% |  2.73 m
[seed 31] BASE e10 | val  48.0% |  3.01 m
[seed 31] BASE e11 | val  47.1% |  3.29 m
[seed 31] BASE e12 | val  47.1% |  3.56 m
[seed 31] BASE e13 | val  44.6% |  3.84 m
[seed 31] BASE e14 | val  48.5% |  4.11 m
[seed 31] BASE e15 | val  53.4% |  4.39 m
[seed 31] BASE e16 | val  47.2% |  4.66 m
[seed 31] BASE e17 | val  52.4% |  4.93 m
[seed 31] BASE e18 | val  56.0% |  5.21 m
[seed 31] BASE e19 | val  55.6% |  5.48 m
[seed 31] BASE e20 | val  44.4% |  5.76 m
[seed 31] BASE e21 | val  56.3% |  6.04 m
[seed 31] BASE e22 | val  59.1% |  6.31 m
[seed 31] BASE e23 | val  59.4% |