## 1) Imports and seeds

In [2]:
import os, random, json, math, time
from pathlib import Path
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

import pandas as pd
import matplotlib.pyplot as plt

# --- Device & AMP (new API; no deprecation warnings) ---
HAS_CUDA = torch.cuda.is_available()
DEVICE   = torch.device('cuda' if HAS_CUDA else 'cpu')
from torch.amp import GradScaler, autocast
SCALER = GradScaler('cuda') if HAS_CUDA else None
def amp_on():  # use: with amp_on(): ...
    return autocast('cuda', enabled=HAS_CUDA)

# --- CuDNN knobs (throughput > determinism) ---
torch.backends.cudnn.benchmark = True   # speed up convs with autotune
# If you need strict reproducibility later, set benchmark=False and deterministic flags.

# --- Paths ---
ROOT  = Path('/content/ATML_A3')
CKPTS = ROOT / 'ckpts'
RES   = ROOT / 'results'
FIGS  = ROOT / 'figures'
for p in [CKPTS, RES, FIGS]:
    p.mkdir(parents=True, exist_ok=True)

# --- Seeding ---
def set_seed(seed=1337):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if HAS_CUDA:
        torch.cuda.manual_seed_all(seed)

set_seed(1337)
print(f"CUDA available: {HAS_CUDA} | Device: {DEVICE}")


CUDA available: True | Device: cuda


## 2) Data: CIFAR-100

In [3]:
def get_cifar100(batch_size=128, num_workers=2):
    mean = (0.5071, 0.4867, 0.4408)
    std  = (0.2675, 0.2565, 0.2761)

    train_tf = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    test_tf = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    train_ds = datasets.CIFAR100(
        root=str(ROOT / "data"),
        train=True,
        download=True,
        transform=train_tf
    )

    val_ds = datasets.CIFAR100(
        root=str(ROOT / "data"),
        train=False,
        download=True,
        transform=test_tf
    )

    train_loader = DataLoader(
        train_ds,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=torch.cuda.is_available(),
        drop_last=True   # helps with BatchNorm stability
    )

    val_loader = DataLoader(
        val_ds,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=torch.cuda.is_available()
    )

    return train_loader, val_loader

train_loader, val_loader = get_cifar100()
print(f"Train batches: {len(train_loader)} | Val batches: {len(val_loader)}")


Train batches: 390 | Val batches: 79


## 3) Models: VGG-16/19 (teacher), VGG-11 (student)

In [4]:
def make_vgg(name='vgg16', num_classes=100, pretrained=False):
    if name == 'vgg16':
        net = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1 if pretrained else None)
    elif name == 'vgg19':
        net = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1 if pretrained else None)
    elif name == 'vgg11':
        net = models.vgg11(weights=None)  # student always scratch
    else:
        raise ValueError(f"Unsupported VGG: {name}")

    in_feats = net.classifier[-1].in_features
    net.classifier[-1] = nn.Linear(in_feats, num_classes)

    # safer init (VGG uses small std for FC)
    nn.init.normal_(net.classifier[-1].weight, mean=0.0, std=0.01)
    nn.init.zeros_(net.classifier[-1].bias)


    return net.to(DEVICE)

def new_teacher():
    # ImageNet-pretrained VGG-16; fine-tune with CE; later freeze for KD
    t = make_vgg('vgg16', pretrained=True)
    return t

def new_student():
    # VGG-11 from scratch (no pretrained ever)
    s = make_vgg('vgg11', pretrained=False)
    return s

def freeze_teacher(model):
    model.eval()
    for p in model.parameters():
        p.requires_grad = False
    return model

# Build once here if you want, or build inside run blocks
teacher16 = new_teacher()
student11 = new_student()
print("Built VGG-16 (teacher, pretrained) & VGG-11 (student, scratch).")


Built VGG-16 (teacher, pretrained) & VGG-11 (student, scratch).


4) Losses: CE, Label Smoothing, KD-LM, DKD

In [5]:
# --- Losses ---

class LabelSmoothingCE(nn.Module):
    def __init__(self, eps=0.1):
        super().__init__()
        self.eps = eps
        self.logsoft = nn.LogSoftmax(dim=1)
    def forward(self, logits, targets):
        n = logits.size(1)
        logp = self.logsoft(logits)
        with torch.no_grad():
            dist = torch.zeros_like(logp)
            dist.fill_(self.eps / (n - 1))
            dist.scatter_(1, targets.unsqueeze(1), 1 - self.eps)
        return torch.mean(torch.sum(-dist * logp, dim=1))


def kd_loss_logits(student_logits, teacher_logits, T=4.0, eps=1e-8):
    # KL( teacher || student ) at temperature T
    log_p = F.log_softmax(student_logits / T, dim=1)
    q     = F.softmax(teacher_logits / T, dim=1)
    q     = q.clamp(min=eps)  # numeric safety
    return F.kl_div(log_p, q, reduction='batchmean') * (T * T)


class DKDLoss(nn.Module):
    """
    Decoupled KD (Zhang et al.): TCKD + NCKD
      - TCKD: KL between [p(y), 1-p(y)] and [q(y), 1-q(y)]
      - NCKD: KL over non-target classes, renormalized
    """
    def __init__(self, alpha=1.0, beta=8.0, T=4.0, eps=1e-8):
        super().__init__()
        self.alpha = alpha
        self.beta  = beta
        self.T     = T
        self.eps   = eps

    def forward(self, s_logits, t_logits, targets):
        T, eps = self.T, self.eps

        # Full softmax distributions
        p_s = F.softmax(s_logits / T, dim=1)
        q_t = F.softmax(t_logits / T, dim=1)

        # ----- TCKD (2-class distributions: target vs. non-target) -----
        idx = targets.unsqueeze(1)                                # [B,1]
        p_y = p_s.gather(1, idx)                                  # [B,1]
        q_y = q_t.gather(1, idx)                                  # [B,1]

        p2 = torch.cat([p_y, 1.0 - p_y], dim=1).clamp(min=eps)    # [B,2]
        q2 = torch.cat([q_y, 1.0 - q_y], dim=1).clamp(min=eps)    # [B,2]
        tckd = F.kl_div(p2.log(), q2, reduction='batchmean')      # KL(p||q) over 2-way

        # ----- NCKD (normalize over non-target classes only) -----
        mask = torch.ones_like(p_s).scatter(1, idx, 0.0)          # zero-out target
        p_nt = (p_s * mask); q_nt = (q_t * mask)
        p_nt = p_nt / (p_nt.sum(dim=1, keepdim=True) + eps)       # renorm
        q_nt = q_nt / (q_nt.sum(dim=1, keepdim=True) + eps)
        nckd = F.kl_div((p_nt + eps).log(), q_nt, reduction='batchmean')

        return (self.alpha * tckd + self.beta * nckd) * (T * T)


## 5) Eval + generic CE training loop

In [6]:
def accuracy_topk(logits, targets, topk=(1,)):
    maxk = max(topk); b = targets.size(0)
    _, pred = logits.topk(maxk, 1, True, True); pred = pred.t()
    correct = pred.eq(targets.view(1, -1).expand_as(pred))
    out = []
    for k in topk:
        out.append(correct[:k].reshape(-1).float().sum(0, keepdim=True).mul_(100.0 / b))
    return out

def evaluate(model, loader):
    model.eval()
    ce = nn.CrossEntropyLoss()
    loss_sum = 0.0; n = 0; top1 = 0.0; top5 = 0.0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(DEVICE, non_blocking=True); y = y.to(DEVICE, non_blocking=True)
            logits = model(x)
            loss = ce(logits, y)
            a1, a5 = accuracy_topk(logits, y, topk=(1, 5))
            bs = x.size(0)
            loss_sum += loss.item() * bs; n += bs
            top1 += a1.item() * bs / 100.0; top5 += a5.item() * bs / 100.0
    return loss_sum / n, 100 * top1 / n, 100 * top5 / n

def train_ce(model, train_loader, val_loader, epochs=60, lr=0.1, weight_decay=5e-4, use_ls=False, ls_eps=0.1, clip=1.0):
    model.to(DEVICE)
    opt  = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
    ce   = LabelSmoothingCE(ls_eps) if use_ls else nn.CrossEntropyLoss()
    best = (1e9, 0, 0)

    for ep in range(1, epochs+1):
        model.train()
        for x,y in train_loader:
            x = x.to(DEVICE, non_blocking=True); y = y.to(DEVICE, non_blocking=True)
            opt.zero_grad(set_to_none=True)

            # NO AMP: keep it simple/stable
            logits = model(x)
            loss = ce(logits, y)

            if not torch.isfinite(loss):
                # skip toxic batch
                continue

            loss.backward()
            if clip is not None:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip)
            opt.step()

        sched.step()
        vl, a1, a5 = evaluate(model, val_loader)
        if a1 > best[1]: best = (vl, a1, a5)
        if ep % 10 == 0 or ep == 1:
            print(f"[CE-safe{'-LS' if use_ls else ''}] {ep}/{epochs} | val_loss={vl:.3f} top1={a1:.2f} top5={a5:.2f}")
    return best


## 6) KD-LM training loop

In [7]:
def train_kd_lm(student, teacher, train_loader, val_loader,
                epochs=60, lr=0.1, alpha=0.6, T=3.5,
                warmup_epochs=5, clip=1.0):
    """
    Stable Logit Matching:
      - teacher: eval + frozen (no grads)
      - student: grads ON
      - CE warm-up for first `warmup_epochs`, then mix in KD with weight `alpha`
      - No AMP for KD (avoids overflow); optional grad clipping
    """
    # --- freeze teacher, enable student ---
    teacher.eval().to(DEVICE)
    for p in teacher.parameters(): p.requires_grad = False
    student.to(DEVICE)
    for p in student.parameters(): p.requires_grad = True

    ce   = nn.CrossEntropyLoss()
    opt  = torch.optim.SGD(student.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)

    best = (1e9, 0, 0)

    for ep in range(1, epochs + 1):
        student.train()
        use_kd = ep > warmup_epochs  # CE-only warm-up

        for x, y in train_loader:
            x = x.to(DEVICE, non_blocking=True); y = y.to(DEVICE, non_blocking=True)

            with torch.no_grad():                 # teacher forward has NO grads
                t_logits = teacher(x)

            opt.zero_grad(set_to_none=True)

            # IMPORTANT: no AMP here for stability
            s_logits = student(x)
            loss = ce(s_logits, y)
            if use_kd and alpha > 0.0:
                kd = kd_loss_logits(s_logits, t_logits, T=T)
                if torch.isfinite(kd):
                    loss = (1 - alpha) * loss + alpha * kd

            if not torch.isfinite(loss):
                continue  # skip pathological batch

            loss.backward()
            if clip is not None:
                torch.nn.utils.clip_grad_norm_(student.parameters(), max_norm=clip)
            opt.step()

        sched.step()

        vl, a1, a5 = evaluate(student, val_loader)
        if a1 > best[1]: best = (vl, a1, a5)
        if ep % 10 == 0 or ep == 1:
            print(f"[KD-LM (safe)] {ep}/{epochs} | use_kd={use_kd} alpha={alpha:.2f} T={T} "
                  f"| val_loss={vl:.3f} top1={a1:.2f} top5={a5:.2f}")

    return best


## 7) DKD training loop

In [8]:
def train_dkd(student, teacher, train_loader, val_loader,
              epochs=60, lr=0.1, alpha=1.0, beta=8.0, T=3.5,
              warmup_epochs=5, clip=1.0):
    """
    Stable DKD:
      - Teacher frozen in eval
      - CE-only warm-up, then add DKD
      - No AMP (KD is numerically touchy)
      - Grad clipping + finite-loss guard
    """
    # freeze teacher, enable student
    teacher.eval().to(DEVICE)
    for p in teacher.parameters(): p.requires_grad = False
    student.to(DEVICE)
    for p in student.parameters(): p.requires_grad = True

    dkd = DKDLoss(alpha=alpha, beta=beta, T=T)
    ce  = nn.CrossEntropyLoss()
    opt = torch.optim.SGD(student.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)

    best = (1e9, 0, 0)

    for ep in range(1, epochs+1):
        student.train()
        use_dkd = ep > warmup_epochs  # CE warm-up

        for x, y in train_loader:
            x = x.to(DEVICE, non_blocking=True); y = y.to(DEVICE, non_blocking=True)
            with torch.no_grad():
                t_logits = teacher(x)

            opt.zero_grad(set_to_none=True)

            # NO AMP for KD (stability first)
            s_logits = student(x)
            loss = ce(s_logits, y)
            if use_dkd:
                dk = dkd(s_logits, t_logits, y)
                if torch.isfinite(dk):
                    loss = loss + dk

            if not torch.isfinite(loss):
                continue  # skip pathological batch

            loss.backward()
            if clip is not None:
                torch.nn.utils.clip_grad_norm_(student.parameters(), max_norm=clip)
            opt.step()

        sched.step()
        vl, a1, a5 = evaluate(student, val_loader)
        if a1 > best[1]: best = (vl, a1, a5)
        if ep % 10 == 0 or ep == 1:
            print(f"[DKD (safe)] {ep}/{epochs} | use_dkd={use_dkd} T={T} "
                  f"| val_loss={vl:.3f} top1={a1:.2f} top5={a5:.2f}")

    return best


### 8) Save/Load helpers

In [9]:
def save_ckpt(model, path):
    torch.save({'state_dict': model.state_dict()}, path)
    print(f"✅ Saved checkpoint to: {path}")

def load_ckpt(model, path, strict=True):
    ckpt = torch.load(path, map_location=DEVICE)
    model.load_state_dict(ckpt['state_dict'], strict=strict)
    print(f"✅ Loaded checkpoint from: {path}")
    return model


# 9) Orchestrate Task 3.1 (Teacher → SI → LS → LM → DKD)

In [46]:
EPOCHS = 60  # use 30-40 for a fast pass if needed

RUN_TRAIN_TEACHER = True
RUN_TRAIN_SI      = True
RUN_TRAIN_LS      = True
RUN_TRAIN_LM      = True
RUN_TRAIN_DKD     = True

# ---------- 1) Teacher (VGG-16, pretrained ImageNet) ----------

teacher_ckpt = CKPTS/'teacher_vgg16.pt'
teacher16 = new_teacher()  # builds with safe init now
best_T = train_ce(teacher16, train_loader, val_loader, epochs=60, lr=0.01)  # << lower LR
save_ckpt(teacher16, CKPTS/'teacher_vgg16.pt')
print("Teacher saved:", best_T)

# Freeze teacher for KD
teacher16.eval()
for p in teacher16.parameters():
    p.requires_grad = False

# ---------- 2) Student Independent (VGG-11, CE only) ----------
si_ckpt = CKPTS/'student_SI.pt'
if si_ckpt.exists() and not RUN_TRAIN_SI:
    si = make_vgg('vgg11', pretrained=False)
    load_ckpt(si, si_ckpt)
    print("Loaded SI from ckpt.")
else:
    si = make_vgg('vgg11', pretrained=False)
    if RUN_TRAIN_SI:
        best_SI = train_ce(si, train_loader, val_loader, epochs=EPOCHS, lr=0.1, tag="SI")
        save_ckpt(si, si_ckpt)
        print("SI saved:", best_SI)

# ---------- 3) Label Smoothing baseline (VGG-11) ----------
ls_ckpt = CKPTS/'student_LS.pt'
if ls_ckpt.exists() and not RUN_TRAIN_LS:
    ls = make_vgg('vgg11', pretrained=False)
    load_ckpt(ls, ls_ckpt)
    print("Loaded LS from ckpt.")
else:
    ls = make_vgg('vgg11', pretrained=False)
    if RUN_TRAIN_LS:
        best_LS = train_ce(ls, train_loader, val_loader, epochs=EPOCHS, lr=0.1, use_ls=True, ls_eps=0.1, tag="LS")
        save_ckpt(ls, ls_ckpt)
        print("LS saved:", best_LS)

# ---------- 4) KD — Logit Matching (VGG-11 distilled from VGG-16) ----------
lm_ckpt = CKPTS/'student_LM.pt'
if lm_ckpt.exists() and not RUN_TRAIN_LM:
    lm = make_vgg('vgg11', pretrained=False)
    load_ckpt(lm, lm_ckpt)
    print("Loaded LM from ckpt.")
else:
    lm = make_vgg('vgg11', pretrained=False)
    if RUN_TRAIN_LM:
        # SAFE KD settings (no AMP, CE warm-up, clip)
        best_LM = train_kd_lm(
            lm, teacher16, train_loader, val_loader,
            epochs=EPOCHS, lr=0.05, alpha=0.7, T=3.5,
            warmup_epochs=5, clip=1.0
        )
        save_ckpt(lm, lm_ckpt)
        print("LM saved:", best_LM)

# ---------- 5) KD — Decoupled KD (VGG-11 distilled from VGG-16) ----------
dkd_ckpt = CKPTS/'student_DKD.pt'
if dkd_ckpt.exists() and not RUN_TRAIN_DKD:
    dkd = make_vgg('vgg11', pretrained=False)
    load_ckpt(dkd, dkd_ckpt)
    print("Loaded DKD from ckpt.")
else:
    dkd = make_vgg('vgg11', pretrained=False)
    if RUN_TRAIN_DKD:
        # SAFE DKD settings (no AMP, CE warm-up, clip)
        best_DKD = train_dkd(
            dkd, teacher16, train_loader, val_loader,
            epochs=EPOCHS, lr=0.05, alpha=1.0, beta=8.0, T=3.5,
            warmup_epochs=5, clip=1.0
        )
        save_ckpt(dkd, dkd_ckpt)
        print("DKD saved:", best_DKD)

# ---------- Summary ----------
def quick_eval(tag, path, arch):
    m = make_vgg(arch, pretrained=False)
    load_ckpt(m, path)
    vl, a1, a5 = evaluate(m, val_loader)
    return {"method": tag, "top1": a1, "top5": a5, "val_loss": vl}

summary = []
if teacher_ckpt.exists(): summary.append(quick_eval("Teacher (VGG16)", teacher_ckpt, 'vgg16'))
if si_ckpt.exists():      summary.append(quick_eval("SI (VGG11)",      si_ckpt,      'vgg11'))
if ls_ckpt.exists():      summary.append(quick_eval("LS (VGG11)",      ls_ckpt,      'vgg11'))
if lm_ckpt.exists():      summary.append(quick_eval("LM (KD)",         lm_ckpt,      'vgg11'))
if dkd_ckpt.exists():     summary.append(quick_eval("DKD",             dkd_ckpt,     'vgg11'))

df = pd.DataFrame(summary).sort_values("top1", ascending=False)
print(df.to_string(index=False))
df.to_csv(RES/'task3_summary.csv', index=False)

[CE-safe] 1/60 | val_loss=2.143 top1=41.48 top5=75.90
[CE-safe] 10/60 | val_loss=1.287 top1=63.36 top5=89.58
[CE-safe] 20/60 | val_loss=1.210 top1=66.59 top5=91.00
[CE-safe] 30/60 | val_loss=1.158 top1=68.48 top5=91.52
[CE-safe] 40/60 | val_loss=1.188 top1=68.48 top5=91.35
[CE-safe] 50/60 | val_loss=1.188 top1=69.13 top5=91.71
[CE-safe] 60/60 | val_loss=1.190 top1=69.34 top5=91.66
✅ Saved checkpoint to: /content/ATML_A3/ckpts/teacher_vgg16.pt
Teacher saved: (1.1941225917816163, 69.44, 91.64)


TypeError: train_ce() got an unexpected keyword argument 'tag'

In [47]:
si_ckpt = CKPTS/'student_SI.pt'
si = make_vgg('vgg11', pretrained=False)
best_SI = train_ce(si, train_loader, val_loader, epochs=EPOCHS, lr=0.1)
save_ckpt(si, si_ckpt)
print("SI saved:", best_SI)

[CE-safe] 1/60 | val_loss=4.292 top1=2.72 top5=13.34
[CE-safe] 10/60 | val_loss=2.640 top1=30.56 top5=63.77
[CE-safe] 20/60 | val_loss=2.010 top1=46.86 top5=76.10
[CE-safe] 30/60 | val_loss=1.730 top1=52.95 top5=81.25
[CE-safe] 40/60 | val_loss=1.569 top1=58.50 top5=84.84
[CE-safe] 50/60 | val_loss=1.486 top1=62.34 top5=86.45
[CE-safe] 60/60 | val_loss=1.534 top1=62.59 top5=86.84
✅ Saved checkpoint to: /content/ATML_A3/ckpts/student_SI.pt
SI saved: (1.5340964878082275, 62.72, 86.87)


In [50]:
ls_ckpt = CKPTS/'student_LS.pt'
ls = make_vgg('vgg11', pretrained=False)
best_LS = train_ce(ls, train_loader, val_loader, epochs=EPOCHS, lr=0.1, use_ls=True, ls_eps=0.1)
save_ckpt(ls, ls_ckpt)
print("LS saved:", best_LS)

[CE-safe-LS] 1/60 | val_loss=4.274 top1=3.18 top5=14.93
[CE-safe-LS] 10/60 | val_loss=2.574 top1=35.05 top5=66.89
[CE-safe-LS] 20/60 | val_loss=2.092 top1=47.43 top5=76.92
[CE-safe-LS] 30/60 | val_loss=1.691 top1=55.31 top5=82.99
[CE-safe-LS] 40/60 | val_loss=1.516 top1=60.27 top5=85.80
[CE-safe-LS] 50/60 | val_loss=1.401 top1=63.44 top5=87.07
[CE-safe-LS] 60/60 | val_loss=1.395 top1=63.98 top5=87.06
✅ Saved checkpoint to: /content/ATML_A3/ckpts/student_LS.pt
LS saved: (1.3961281625747681, 64.09, 87.13)


In [None]:
lm_ckpt = CKPTS/'student_LM.pt'
lm = make_vgg('vgg11', pretrained=False)
best_LM = train_kd_lm(
    lm, teacher16, train_loader, val_loader,
    epochs=60, lr=0.07, alpha=0.8, T=3.0,
    warmup_epochs=5, clip=1.0
)
print("LM saved:", best_LM)

[KD-LM (safe)] 1/60 | use_kd=False alpha=0.80 T=3.0 | val_loss=4.340 top1=2.41 top5=12.17
[KD-LM (safe)] 10/60 | use_kd=True alpha=0.80 T=3.0 | val_loss=3.596 top1=26.17 top5=55.86
[KD-LM (safe)] 20/60 | use_kd=True alpha=0.80 T=3.0 | val_loss=3.003 top1=39.65 top5=69.41


## 10) Quick metrics dump (for your report table/plot later)

In [None]:
def snapshot_metrics(method_name, model):
    vl,a1,a5 = evaluate(model, val_loader)
    return {'method':method_name, 'top1_acc':a1, 'top5_acc':a5, 'val_loss':vl}

rows = []
# Reload to be safe (in case you restart cells later)
T  = make_vgg('vgg16'); load_ckpt(T,  str(CKPTS/'teacher_vgg16.pt'))
SI = make_vgg('vgg11'); load_ckpt(SI, str(CKPTS/'student_SI.pt'))
LS = make_vgg('vgg11'); load_ckpt(LS, str(CKPTS/'student_LS.pt'))
LM = make_vgg('vgg11'); load_ckpt(LM, str(CKPTS/'student_LM.pt'))
DK = make_vgg('vgg11'); load_ckpt(DK, str(CKPTS/'student_DKD.pt'))

for name, m in [('SI',SI), ('LS',LS), ('LM',LM), ('DKD',DK)]:
    rows.append(snapshot_metrics(name, m))

df = pd.DataFrame(rows)
df.to_csv(RES/'task3_part1_metrics.csv', index=False)
print(df)
print("Saved:", RES/'task3_part1_metrics.csv')
