In [None]:
# Cell 1 – Mount Drive | Imports | Seed | CONFIG
from pathlib import Path
import os, time, math, pickle, random, warnings
import numpy as np
import torch, torch.nn as nn, torch.optim as optim, torch.nn.functional as F
from tqdm import tqdm  # Add tqdm for progress tracking
warnings.filterwarnings("ignore")

# Mount Google Drive (if available)
def in_colab():
    try:
        import google.colab  # noqa
        return True
    except Exception:
        return False

if in_colab():
    from google.colab import drive
    drive.mount("/content/drive", force_remount=False)

# Reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True

set_seed(42)

# CONFIG
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"PyTorch CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"CUDA device count: {torch.cuda.device_count()}")

CONFIG = {
    "DEVICE": DEVICE,
    "SEED": 42,
    "NUM_WORKERS": 0,  # Keep at 0 for Colab
    "BATCH_SIZE": 32 if DEVICE == "cpu" else 64,
    "LR": 1e-3,
    "DEFAULT_EPOCHS": 30,
    "EPOCHS_PER_DATASET": {"CIFAR10": 15, "CIFAR100": 30, "CINIC10": 30},
    "KD_TEMPERATURE": 4.0,
    "KD_LAMBDA": 0.7,
    "AT_PGD_STEPS": 5,
    "AT_STEP_SIZE": 2/255,
    "AUX_SIZE": 1000,
    "DATASET_HP": {
        "CIFAR10":  {"tau": 4.0, "k": 3, "eps_k": 0.15, "lr_teacher": 1e-3, "lr_student": 5e-4, "adv_eps": 8/255},
        "CIFAR100": {"tau": 5.0, "k": 5, "eps_k": 0.15, "lr_teacher": 1e-3, "lr_student": 5e-4, "adv_eps": 8/255},
        "CINIC10":  {"tau": 3.5, "k": 3, "eps_k": 0.15, "lr_teacher": 1e-3, "lr_student": 5e-4, "adv_eps": 8/255}
    },
    "SIZES": {
        "CIFAR10":  (50_000, 10_000),
        "CIFAR100": (50_000, 10_000),
        "CINIC10":  (90_000, 90_000),
        "YAHOO":    (50_000, 20_000),
        "CRITEO":   (80_000, 20_000)
    },
    "CLAMP_MIN": -3.0,
    "CLAMP_MAX":  3.0,
    "DRIVE_PATH": "/content",
    "PIN_MEMORY": False,
    "PREFETCH_FACTOR": None,
}

os.makedirs(CONFIG["DRIVE_PATH"], exist_ok=True)
print("Device:", CONFIG["DEVICE"])
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory/1e9:.2f} GB")
    torch.backends.cudnn.benchmark = True
    print("cuDNN benchmark enabled for GPU performance.")
else:
    print("WARNING: Running on CPU - training will be slow!")

Mounted at /content/drive
PyTorch CUDA available: True
Current CUDA device: 0
CUDA device count: 1
Device: cuda
GPU: Tesla T4
GPU Memory: 15.83 GB
cuDNN benchmark enabled for GPU performance.


In [None]:
# Cell 2 – Transforms | Dataset Loading | Downsampling | Vertical Split
from torchvision import transforms
from torchvision.datasets import CIFAR10, CIFAR100, ImageFolder
from torch.utils.data import DataLoader, Subset
import zipfile, random, shutil

# Normalization & Augmentation
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]
train_tf_32 = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])
test_tf_32 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])
train_tf_cinic = transforms.Compose([
    transforms.Resize(32),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])
test_tf_cinic = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

# --- Channel Split for VFL ---
def image_channel_split(x):
    c1, c2, c3 = x.chunk(3, dim=1)
    return c1, torch.cat([c2, c3], dim=1)

class VertImageLoader:
    def __init__(self, base_loader): self.base_loader = base_loader
    def __iter__(self):
        for imgs, y in self.base_loader:
            xa, xp = image_channel_split(imgs)
            yield (xa, xp), y
    def __len__(self): return len(self.base_loader)

# Local Dataset Path
DATA_PATH = Path("/content/datasets")
DATA_PATH.mkdir(parents=True, exist_ok=True)

# Copy archives from Drive if needed
drive_ds = Path("/content/drive/MyDrive/datasets")
if drive_ds.exists():
    for z in drive_ds.glob("*.zip"):
        dest = DATA_PATH / z.name
        if not dest.exists():
            print(f"Copying {z.name} to local disk for speed...")
            shutil.copy(z, dest)

# Disable multiprocessing for Colab (prevents 1st-epoch stall)
CONFIG["NUM_WORKERS"] = 0
CONFIG["PREFETCH_FACTOR"] = None
CONFIG["PIN_MEMORY"] = False

def create_dataloader(ds, bs, shuffle):
    return DataLoader(
        ds,
        batch_size=bs,
        shuffle=shuffle,
        num_workers=0,
        pin_memory=False,
        persistent_workers=False,
        drop_last=shuffle
    )

def subsample_dataset(dataset, target_size, seed=42):
    random.seed(seed)
    total = len(dataset)
    if target_size < total:
        indices = random.sample(range(total), target_size)
        dataset = Subset(dataset, indices)
    return dataset

VISION_LOADERS = {}

# CIFAR-10
print("Preparing CIFAR10 ...")
tr = CIFAR10(DATA_PATH, train=True, download=True, transform=train_tf_32)
te = CIFAR10(DATA_PATH, train=False, download=True, transform=test_tf_32)
t_train, t_test = CONFIG["SIZES"]["CIFAR10"]
tr, te = subsample_dataset(tr, t_train), subsample_dataset(te, t_test)
VISION_LOADERS["CIFAR10"] = {
    "train": VertImageLoader(create_dataloader(tr, CONFIG["BATCH_SIZE"], True)),
    "test":  VertImageLoader(create_dataloader(te, CONFIG["BATCH_SIZE"], False)),
    "num_classes": 10,
}
print(f" CIFAR10 → train={t_train}, test={t_test}")

# CIFAR-100
print("Preparing CIFAR100 ...")
tr = CIFAR100(DATA_PATH, train=True, download=True, transform=train_tf_32)
te = CIFAR100(DATA_PATH, train=False, download=True, transform=test_tf_32)
t_train, t_test = CONFIG["SIZES"]["CIFAR100"]
tr, te = subsample_dataset(tr, t_train), subsample_dataset(te, t_test)
VISION_LOADERS["CIFAR100"] = {
    "train": VertImageLoader(create_dataloader(tr, CONFIG["BATCH_SIZE"], True)),
    "test":  VertImageLoader(create_dataloader(te, CONFIG["BATCH_SIZE"], False)),
    "num_classes": 100,
}
print(f" CIFAR100 → train={t_train}, test={t_test}")

# CINIC-10 (Flexible Extraction + Path Normalization)
print("Preparing CINIC10 ...")
cinic_root = DATA_PATH / "cinic-10"
alt_root   = DATA_PATH / "CINIC-10"
cinic_zip  = DATA_PATH / "cinic-10.zip"

# Extract if needed
if not cinic_root.exists() and alt_root.exists():
    cinic_root = alt_root
if not cinic_root.exists() and cinic_zip.exists():
    print(f" Extracting CINIC-10 from {cinic_zip} ...")
    with zipfile.ZipFile(cinic_zip, "r") as zf:
        zf.extractall(DATA_PATH)
    cinic_root = DATA_PATH / "cinic-10"

# Load if available
if cinic_root.exists():
    cinic_tr = ImageFolder(cinic_root/"train", transform=train_tf_cinic)
    cinic_te = ImageFolder(cinic_root/"test",  transform=test_tf_cinic)
    t_train, t_test = CONFIG["SIZES"]["CINIC10"]
    cinic_tr, cinic_te = subsample_dataset(cinic_tr, t_train), subsample_dataset(cinic_te, t_test)
    VISION_LOADERS["CINIC10"] = {
        "train": VertImageLoader(create_dataloader(cinic_tr, CONFIG["BATCH_SIZE"], True)),
        "test":  VertImageLoader(create_dataloader(cinic_te, CONFIG["BATCH_SIZE"], False)),
        "num_classes": 10,
    }
    print(f" CINIC10 → train={t_train}, test={t_test}")
else:
    print(" CINIC-10 dataset not found — skipped.")

print("\nAll vision datasets preloaded and downsampled successfully (Colab-safe).")


Copying criteo.zip to local disk for speed...
Copying cinic-10.zip to local disk for speed...
Preparing CIFAR10 ...


100%|██████████| 170M/170M [00:11<00:00, 14.2MB/s]


 CIFAR10 → train=50000, test=10000
Preparing CIFAR100 ...


100%|██████████| 169M/169M [00:11<00:00, 14.8MB/s]


 CIFAR100 → train=50000, test=10000
Preparing CINIC10 ...
 Extracting CINIC-10 from /content/datasets/cinic-10.zip ...
 CINIC10 → train=90000, test=90000

All vision datasets preloaded and downsampled successfully (Colab-safe).


In [None]:
# Cell 3 – VFL Vision Model Definitions
class BottomModelA_Vision(nn.Module):
    def __init__(self, out_dim=512):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(64,128,3,padding=1,bias=False), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.Conv2d(128,128,3,padding=1,bias=False), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(128,256,3,padding=1,bias=False), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.Conv2d(256,256,3,padding=1,bias=False), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1)
        )
        self.fc = nn.Sequential(nn.Linear(256,out_dim), nn.BatchNorm1d(out_dim),
                                nn.ReLU(inplace=True), nn.Dropout(0.2))
    def forward(self,x):
        h=self.features(x).view(x.size(0),-1)
        return self.fc(h)

class BottomModelP_Vision(BottomModelA_Vision):
    def __init__(self,out_dim=512):
        super().__init__(out_dim)
        self.features[0]=nn.Conv2d(2,64,3,padding=1,bias=False)

class TopModel_Vision(nn.Module):
    def __init__(self,num_classes,in_dim=1024):
        super().__init__()
        self.fc1, self.bn1 = nn.Linear(in_dim,512), nn.BatchNorm1d(512)
        self.fc2, self.bn2 = nn.Linear(512,256), nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256,num_classes)
    def forward(self,h_a,h_p):
        h=torch.cat([h_a,h_p],dim=1)
        h=F.relu(self.bn1(self.fc1(h)))
        h=F.relu(self.bn2(self.fc2(h)))
        return self.fc3(h)

class VFLModel(nn.Module):
    def __init__(self,num_classes):
        super().__init__()
        self.bottom_a=BottomModelA_Vision()
        self.bottom_p=BottomModelP_Vision()
        self.top_model=TopModel_Vision(num_classes)
    def forward(self,x_a,x_p):
        return self.top_model(self.bottom_a(x_a), self.bottom_p(x_p))

def build_vfl_model_vision(num_classes:int):
    model=VFLModel(num_classes)
    for m in model.modules():
        if isinstance(m,nn.Conv2d):
            nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
        elif isinstance(m,(nn.BatchNorm2d,nn.BatchNorm1d)):
            nn.init.constant_(m.weight,1); nn.init.constant_(m.bias,0)
        elif isinstance(m,nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None: nn.init.constant_(m.bias,0)
    return model.to(CONFIG["DEVICE"])


In [None]:
# Cell 4 – VFL Training and Evaluation Functions
from tqdm import tqdm
import torch
import torch.nn.functional as F

def train_epoch_OA(model, loader, optimizer, config, dataset, use_at=False, show_progress=True):
    """
    Trains the main VFL model (e.g., the teacher 'OA') for one epoch.
    Returns training loss and training accuracy.
    """
    model.train()
    total_loss, total_correct, total_samples = 0, 0, 0
    iterator = tqdm(loader, desc='Training OA', ncols=100) if show_progress else loader
    device = config["DEVICE"]

    for (xa, xp), y in iterator:
        xa, xp, y = xa.to(device), xp.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(xa, xp)
        loss = F.cross_entropy(out, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * y.size(0)
        preds = out.argmax(1)
        total_correct += (preds == y).sum().item()
        total_samples += y.size(0)

        if show_progress:
            iterator.set_postfix({'loss': f'{loss.item():.4f}'})

    avg_loss = total_loss / total_samples
    train_acc = 100.0 * total_correct / total_samples
    return avg_loss, train_acc

def train_epoch_KD(student, teacher, loader, optimizer, config, dataset_name, use_at=False, show_progress=True):
    """
    Trains a VFL student model using knowledge distillation.
    Returns training loss and training accuracy.
    """
    student.train()
    teacher.eval()
    total_loss, total_correct, total_samples = 0, 0, 0
    iterator = tqdm(loader, desc='Training KD', ncols=100) if show_progress else loader
    device = config["DEVICE"]
    hp = config["DATASET_HP"][dataset_name]

    for (xa, xp), y in iterator:
        xa, xp, y = xa.to(device), xp.to(device), y.to(device)
        optimizer.zero_grad()

        xp_train = xp
        if use_at:
            student.eval()
            xp_adv = xp.clone().detach().requires_grad_(True)
            logits_adv = student(xa, xp_adv)
            loss_adv = F.cross_entropy(logits_adv, y)
            grad = torch.autograd.grad(loss_adv, xp_adv, retain_graph=False)[0]
            student.train()
            xp_train = torch.clamp(xp + hp["adv_eps"] * grad.sign(), config["CLAMP_MIN"], config["CLAMP_MAX"]).detach()

        with torch.no_grad():
            teacher_logits = teacher(xa, xp_train)
        student_logits = student(xa, xp_train)

        T = hp["tau"]
        kd_loss = F.kl_div(F.log_softmax(student_logits / T, dim=1), F.log_softmax(teacher_logits / T, dim=1), reduction='batchmean', log_target=True) * (T * T)
        ce_loss = F.cross_entropy(student_logits, y)
        loss = config["KD_LAMBDA"] * kd_loss + (1 - config["KD_LAMBDA"]) * ce_loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * y.size(0)
        preds = student_logits.argmax(1)
        total_correct += (preds == y).sum().item()
        total_samples += y.size(0)

        if show_progress and (iterator.n % 50 == 0):
             iterator.set_postfix({'loss': f'{loss.item():.4f}'})

    avg_loss = total_loss / total_samples
    train_acc = 100.0 * total_correct / total_samples
    return avg_loss, train_acc

def train_epoch_KD_DP(student, teacher, loader, optimizer, config, dataset_name, noise_multiplier=0.5, use_at=True, show_progress=True):
    """
    Trains a student with KD and AT, adding noise to the passive party's
    gradient using a hook for improved privacy and lower ASR. (CORRECTED)
    """
    student.train()
    teacher.eval()
    total_loss, total_correct, total_samples = 0, 0, 0
    device = config["DEVICE"]
    hp = config["DATASET_HP"][dataset_name]
    iterator = tqdm(loader, desc='Training KD+AT+DP', ncols=100) if show_progress else loader

    for (xa, xp), y in iterator:
        xa, xp, y = xa.to(device), xp.to(device), y.to(device)
        optimizer.zero_grad()

        xp_train = xp
        if use_at:
            student.eval()
            xp_adv = xp.clone().detach().requires_grad_(True)
            logits_adv = student(xa, xp_adv)
            loss_adv = F.cross_entropy(logits_adv, y)
            grad = torch.autograd.grad(loss_adv, xp_adv, retain_graph=False)[0]
            student.train()
            xp_train = torch.clamp(xp + hp["adv_eps"] * grad.sign(), config["CLAMP_MIN"], config["CLAMP_MAX"]).detach()

        with torch.no_grad():
            teacher_logits = teacher(xa, xp_train)

        h_a = student.bottom_a(xa)
        h_p = student.bottom_p(xp_train)

        if noise_multiplier > 0:
            def add_noise_hook(grad):
                noise = torch.randn_like(grad) * noise_multiplier
                return grad + noise
            h_p.register_hook(add_noise_hook)

        student_logits = student.top_model(h_a, h_p)

        T = hp["tau"]
        kd_loss = F.kl_div(F.log_softmax(student_logits / T, dim=1), F.log_softmax(teacher_logits / T, dim=1), reduction='batchmean', log_target=True) * (T * T)
        ce_loss = F.cross_entropy(student_logits, y)
        loss = config["KD_LAMBDA"] * kd_loss + (1 - config["KD_LAMBDA"]) * ce_loss

        loss.backward()
        optimizer.step()

        total_loss += loss.item() * y.size(0)
        preds = student_logits.argmax(1)
        total_correct += (preds == y).sum().item()
        total_samples += y.size(0)

    avg_loss = total_loss / total_samples if total_samples > 0 else 0.0
    train_acc = 100.0 * total_correct / total_samples if total_samples > 0 else 0.0
    return avg_loss, train_acc

print("All VFL training functions loaded successfully (including hook-based DP).")

All VFL training functions loaded successfully (including hook-based DP).


In [None]:
# Cell 5 – Evaluation and Privacy Attack Functions
from sklearn.metrics import f1_score, roc_auc_score, top_k_accuracy_score
from tqdm import tqdm
import numpy as np
import torch
import torch.nn.functional as F

def evaluate_model(model, loader, config, robust=False, dataset_name=None):
    """
    Compute loss, Top-1, and Top-5 accuracy for VFL models.
    This version correctly returns THREE values: (loss, top1_acc, top5_acc).
    """
    device = config["DEVICE"]
    model.eval()
    y_true_list, y_probs_list = [], []
    total_loss, total = 0.0, 0

    with torch.no_grad():
        for (xa, xp), y in loader:
            xa, xp, y = xa.to(device), xp.to(device), y.to(device)
            xp_eval = xp

            if robust:
                with torch.enable_grad():
                    hp = config["DATASET_HP"][dataset_name]
                    eps = hp["adv_eps"]
                    xp_adv = xp.clone().detach().requires_grad_(True)
                    logits_adv = model(xa, xp_adv)
                    loss_adv = F.cross_entropy(logits_adv, y)
                    grad = torch.autograd.grad(loss_adv, xp_adv, retain_graph=False)[0]
                xp_eval = torch.clamp(xp + eps * grad.sign(), config["CLAMP_MIN"], config["CLAMP_MAX"])

            logits = model(xa, xp_eval)
            loss = F.cross_entropy(logits, y)
            total_loss += loss.item() * y.size(0)
            total += y.size(0)

            y_true_list.extend(y.cpu().numpy())
            y_probs_list.extend(F.softmax(logits, dim=1).cpu().numpy())

    loss_avg = total_loss / total if total > 0 else 0.0
    y_true = np.array(y_true_list)
    y_probs = np.array(y_probs_list)

    # Calculate Top-1 and Top-5 accuracy
    num_classes = y_probs.shape[1]
    top1_acc = top_k_accuracy_score(y_true, y_probs, k=1, labels=range(num_classes)) * 100
    top5_acc = top_k_accuracy_score(y_true, y_probs, k=min(5, num_classes), labels=range(num_classes)) * 100

    return loss_avg, top1_acc, top5_acc


def run_privacy_attack_vision_multimode(
    vfl_model, train_loader, test_loader, config, num_classes,
    attack_type="passive", aux_batches_limit=4,
    attacker_lr=5e-4, attacker_epochs=2,
    active_eps=0.02, active_queries=2, perturbed_sigma=0.1
):
    """
    Multi-mode Label Inference Attack. Returns both Top-1 and Top-5 ASR.
    """
    device = config["DEVICE"]
    vfl_model.eval()

    (xa_ex, xp_ex), _ = next(iter(test_loader))
    with torch.no_grad():
        h_p_dim = vfl_model.bottom_p(xp_ex[:1].to(device)).shape[1]

    feat_dim = h_p_dim * active_queries if attack_type == "active" else h_p_dim
    attacker = nn.Sequential(
        nn.Linear(feat_dim, 128), nn.ReLU(inplace=True),
        nn.Dropout(0.2), nn.Linear(128, num_classes)
    ).to(device)
    opt = torch.optim.AdamW(attacker.parameters(), lr=attacker_lr, weight_decay=1e-4)

    aux_data = []
    for i, batch in enumerate(train_loader):
        if i >= aux_batches_limit: break
        aux_data.append(batch)

    # Train Attacker
    attacker.train()
    for _ in range(attacker_epochs):
        for (xa, xp), y in aux_data:
            xa, xp, y = xa.to(device), xp.to(device), y.to(device)
            with torch.enable_grad():
                if attack_type == "direct":
                    feat = vfl_model.bottom_p(xp)
                elif attack_type == "active":
                    grads = []
                    h_a = vfl_model.bottom_a(xa)
                    for _ in range(active_queries):
                        delta = (torch.rand_like(xp) * 2 - 1) * active_eps
                        xp_q = torch.clamp(xp + delta, config["CLAMP_MIN"], config["CLAMP_MAX"])
                        h_p_q = vfl_model.bottom_p(xp_q); h_p_q.requires_grad_()
                        logits_q = vfl_model.top_model(h_a, h_p_q)
                        grad_q = torch.autograd.grad(logits_q.sum(), h_p_q, retain_graph=False)[0]
                        grads.append(grad_q)
                    feat = torch.cat(grads, dim=1)
                else: # Passive and Perturbed
                    h_p = vfl_model.bottom_p(xp); h_p.requires_grad_()
                    h_a = vfl_model.bottom_a(xa)
                    logits = vfl_model.top_model(h_a, h_p)
                    grad = torch.autograd.grad(logits.sum(), h_p, retain_graph=False)[0]
                    if attack_type == "passive":
                        feat = grad
                    elif attack_type == "perturbed":
                        feat = grad + torch.randn_like(grad) * perturbed_sigma

            opt.zero_grad(set_to_none=True)
            pred = attacker(feat.detach())
            loss = F.cross_entropy(pred, y)
            loss.backward()
            opt.step()

    # Evaluate Attacker
    attacker.eval()
    y_true, y_probs = [], []
    with torch.no_grad():
        for (xa, xp), y in test_loader:
            xa, xp, y = xa.to(device), xp.to(device), y.to(device)
            with torch.enable_grad():
                if attack_type == "direct":
                    feat = vfl_model.bottom_p(xp)
                elif attack_type == "active":
                    grads = []
                    h_a = vfl_model.bottom_a(xa)
                    for _ in range(active_queries):
                        delta = (torch.rand_like(xp) * 2 - 1) * active_eps
                        xp_q = torch.clamp(xp + delta, config["CLAMP_MIN"], config["CLAMP_MAX"])
                        h_p_q = vfl_model.bottom_p(xp_q); h_p_q.requires_grad_()
                        logits_q = vfl_model.top_model(h_a, h_p_q)
                        grad_q = torch.autograd.grad(logits_q.sum(), h_p_q, retain_graph=False)[0]
                        grads.append(grad_q)
                    feat = torch.cat(grads, dim=1)
                else: # Passive and Perturbed
                    h_p = vfl_model.bottom_p(xp); h_p.requires_grad_()
                    h_a = vfl_model.bottom_a(xa)
                    logits = vfl_model.top_model(h_a, h_p)
                    grad = torch.autograd.grad(logits.sum(), h_p, retain_graph=False)[0]
                    if attack_type == "passive":
                        feat = grad
                    elif attack_type == "perturbed":
                        feat = grad
            probs = F.softmax(attacker(feat.detach()), dim=1).cpu()
            y_true.extend(y.cpu().tolist()); y_probs.extend(probs.tolist())

    y_true, y_probs = np.array(y_true), np.array(y_probs)

    # Calculate Top-1 and Top-5 ASR
    asr_top1 = top_k_accuracy_score(y_true, y_probs, k=1, labels=range(num_classes)) * 100
    asr_top5 = top_k_accuracy_score(y_true, y_probs, k=min(5, num_classes), labels=range(num_classes)) * 100

    return float(asr_top1), float(asr_top5)

print("Functions updated to return Evaluation Metrics")

Functions updated to return Evaluation Metrics


In [None]:
# Cell 6 – Main Vision Training Loop

import pandas as pd
import time
from torch.optim.lr_scheduler import CosineAnnealingLR
from pathlib import Path

VISION_GROUP = ["CIFAR10"]
SAVED_MODELS_VISION, VISION_TRAIN_LOGS = {}, []

drive_results_dir = Path(CONFIG["DRIVE_PATH"]) / "VFL_Results" / "VISION"
drive_results_dir.mkdir(parents=True, exist_ok=True)
print("Starting Vision VFL Training with Optimizations\n" + "="*70)

for dataset_name in VISION_GROUP:
    if dataset_name not in VISION_LOADERS:
        print(f"Skipping {dataset_name} (not loaded)")
        continue

    loaders = VISION_LOADERS[dataset_name]
    tr_loader, te_loader = loaders["train"], loaders["test"]
    num_classes = loaders["num_classes"]
    epochs = CONFIG["EPOCHS_PER_DATASET"].get(dataset_name, CONFIG["DEFAULT_EPOCHS"])
    hp = CONFIG["DATASET_HP"][dataset_name]
    SAVED_MODELS_VISION[dataset_name] = {}

    # Phase 1: Original Architecture OA/Teacher
    print(f"\n{dataset_name} – Phase 1: Training OA (Teacher)")
    OA = build_vfl_model_vision(num_classes)
    opt = optim.AdamW(OA.parameters(), lr=hp["lr_teacher"], weight_decay=1e-4)
    sched = CosineAnnealingLR(opt, T_max=epochs, eta_min=1e-5)
    best_val, no_imp, patience = 0, 0, 5

    for ep in range(epochs):
        t0 = time.perf_counter()
        train_loss, train_acc = train_epoch_OA(OA, tr_loader, opt, CONFIG, dataset=dataset_name, show_progress=True)
        sched.step()
        val_loss, val_acc = evaluate_model(OA, te_loader, CONFIG, dataset_name=dataset_name)
        elapsed = time.perf_counter() - t0
        print(f"  Epoch {ep+1:02d}/{epochs} | Train Loss={train_loss:.4f} | Train ACC={train_acc:6.2f}% | Val Loss={val_loss:.4f} | Val ACC={val_acc:6.2f}% | Time={elapsed:.1f}s")
        if val_acc > best_val:
            best_val, no_imp = val_acc, 0
            torch.save(OA.state_dict(), drive_results_dir / f"{dataset_name}_OA_best.pth")
            print(f"    → Best model saved (Val ACC: {val_acc:.2f}%)")
        else:
            no_imp += 1
            if no_imp >= patience:
                print("  Early stopping triggered.")
                break

    OA.load_state_dict(torch.load(drive_results_dir / f"{dataset_name}_OA_best.pth", map_location=CONFIG["DEVICE"]))
    SAVED_MODELS_VISION[dataset_name]["OA"] = f"{dataset_name}_OA_best.pth"

    # Phase 2: Knowledge Distillation (KD) with k-anonymity
    print(f"\n{dataset_name} – Phase 2: Training KDk (Student)")
    KDk = build_vfl_model_vision(num_classes)
    opt = optim.AdamW(KDk.parameters(), lr=hp["lr_student"], weight_decay=1e-4)
    sched = CosineAnnealingLR(opt, T_max=epochs, eta_min=1e-6)
    best_val, no_imp = 0, 0

    for ep in range(epochs):
        t0 = time.perf_counter()
        train_loss, train_acc = train_epoch_KD(KDk, OA, tr_loader, opt, CONFIG, dataset_name, show_progress=True)
        sched.step()
        val_loss, val_acc = evaluate_model(KDk, te_loader, CONFIG, dataset_name=dataset_name)
        elapsed = time.perf_counter() - t0
        print(f"  Epoch {ep+1:02d}/{epochs} | Train Loss={train_loss:.4f} | Train ACC={train_acc:6.2f}% | Val Loss={val_loss:.4f} | Val ACC={val_acc:6.2f}% | Time={elapsed:.1f}s")
        if val_acc > best_val:
            best_val, no_imp = val_acc, 0
            torch.save(KDk.state_dict(), drive_results_dir / f"{dataset_name}_KDk_best.pth")
            print(f"    → Best model saved (Val ACC: {val_acc:.2f}%)")
        else:
            no_imp += 1
            if no_imp >= patience:
                print("  Early stopping triggered.")
                break

    KDk.load_state_dict(torch.load(drive_results_dir / f"{dataset_name}_KDk_best.pth", map_location=CONFIG["DEVICE"]))
    SAVED_MODELS_VISION[dataset_name]["KDk"] = f"{dataset_name}_KDk_best.pth"

    # Phase 3: KDk with Adversarial Training (KDk+AT)
    print(f"\n{dataset_name} – Phase 3: Training KDk+AT (Student + Adv Train)")
    KDkAT = build_vfl_model_vision(num_classes)
    opt = optim.AdamW(KDkAT.parameters(), lr=hp["lr_student"], weight_decay=1e-4)
    sched = CosineAnnealingLR(opt, T_max=epochs, eta_min=1e-6)
    best_rob, no_imp = 0, 0

    for ep in range(epochs):
        t0 = time.perf_counter()
        train_loss, train_acc = train_epoch_KD(KDkAT, OA, tr_loader, opt, CONFIG, dataset_name, use_at=True, show_progress=True)
        sched.step()
        val_loss, val_acc = evaluate_model(KDkAT, te_loader, CONFIG, dataset_name=dataset_name)
        _, rob_acc = evaluate_model(KDkAT, te_loader, CONFIG, robust=True, dataset_name=dataset_name)
        elapsed = time.perf_counter() - t0
        print(f"  Epoch {ep+1:02d}/{epochs} | Train Loss={train_loss:.4f} | Train ACC={train_acc:6.2f}% | Val Loss={val_loss:.4f} | Val ACC={val_acc:6.2f}% | Robust={rob_acc:6.2f}% | Time={elapsed:.1f}s")
        if rob_acc > best_rob:
            best_rob, no_imp = rob_acc, 0
            torch.save(KDkAT.state_dict(), drive_results_dir / f"{dataset_name}_KDkAT_best.pth")
            print(f"    → Best model saved (Robust ACC: {rob_acc:.2f}%)")
        else:
            no_imp += 1
            if no_imp >= patience:
                print("  Early stopping triggered.")
                break

    KDkAT.load_state_dict(torch.load(drive_results_dir / f"{dataset_name}_KDkAT_best.pth", map_location=CONFIG["DEVICE"]))
    SAVED_MODELS_VISION[dataset_name]["KDk+AT"] = f"{dataset_name}_KDkAT_best.pth"

    # Phase 4: KDk+AT+DP (KDk with Adversarial Training and Differential Privacy)
    print(f"\n{dataset_name} – Phase 4: Training KDk+AT+DP (Adding Privacy)")
    KDkATDP = build_vfl_model_vision(num_classes)
    KDkATDP.load_state_dict(torch.load(drive_results_dir / f"{dataset_name}_KDkAT_best.pth", map_location=CONFIG["DEVICE"]))
    opt = optim.AdamW(KDkATDP.parameters(), lr=hp["lr_student"]/5, weight_decay=1e-4)

    for ep in range(epochs):
        t0 = time.perf_counter()
        train_loss, train_acc = train_epoch_KD_DP(KDkATDP, OA, tr_loader, opt, CONFIG, dataset_name, noise_multiplier=0.7)
        val_loss, val_acc = evaluate_model(KDkATDP, te_loader, CONFIG, dataset_name=dataset_name)
        _, rob_acc = evaluate_model(KDkATDP, te_loader, CONFIG, robust=True, dataset_name=dataset_name)
        elapsed = time.perf_counter() - t0
        print(f"  Epoch {ep+1:02d}/{epochs} | Train Loss={train_loss:.4f} | Train ACC={train_acc:6.2f}% | Val Loss={val_loss:.4f} | Val ACC={val_acc:6.2f}% | Robust={rob_acc:6.2f}% | Time={elapsed:.1f}s")

    torch.save(KDkATDP.state_dict(), drive_results_dir / f"{dataset_name}_KDkATDP_best.pth")
    SAVED_MODELS_VISION[dataset_name]["KDk+AT+DP"] = f"{dataset_name}_KDkATDP_best.pth"
    print(f"  Saved privacy-enhanced model.")

    del OA, KDk, KDkAT, KDkATDP, opt, sched
    torch.cuda.empty_cache()

print("\n" + "="*70)
print("Training completed for all datasets!")

Starting Vision VFL Training with Optimizations

CIFAR100 – Phase 1: Training OA (Teacher)


Training OA: 100%|███████████████████████████████████| 781/781 [01:05<00:00, 11.91it/s, loss=3.7379]


  Epoch 01/30 | Train Loss=4.1765 | Train ACC=  6.85% | Val Loss=3.9139 | Val ACC= 11.07% | Time=69.1s
    → Best model saved (Val ACC: 11.07%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.39it/s, loss=3.2862]


  Epoch 02/30 | Train Loss=3.5788 | Train ACC= 15.05% | Val Loss=3.2223 | Val ACC= 19.99% | Time=66.4s
    → Best model saved (Val ACC: 19.99%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.44it/s, loss=2.8558]


  Epoch 03/30 | Train Loss=3.2314 | Train ACC= 20.78% | Val Loss=2.9203 | Val ACC= 26.33% | Time=66.2s
    → Best model saved (Val ACC: 26.33%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.38it/s, loss=2.8920]


  Epoch 04/30 | Train Loss=2.9874 | Train ACC= 25.16% | Val Loss=2.7338 | Val ACC= 29.82% | Time=66.4s
    → Best model saved (Val ACC: 29.82%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.31it/s, loss=2.4042]


  Epoch 05/30 | Train Loss=2.7898 | Train ACC= 28.99% | Val Loss=2.3853 | Val ACC= 36.71% | Time=68.9s
    → Best model saved (Val ACC: 36.71%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.25it/s, loss=2.9472]


  Epoch 06/30 | Train Loss=2.6313 | Train ACC= 32.25% | Val Loss=2.2227 | Val ACC= 40.37% | Time=67.2s
    → Best model saved (Val ACC: 40.37%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.20it/s, loss=2.6424]


  Epoch 07/30 | Train Loss=2.4973 | Train ACC= 35.30% | Val Loss=2.1250 | Val ACC= 42.98% | Time=67.7s
    → Best model saved (Val ACC: 42.98%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.51it/s, loss=2.6554]


  Epoch 08/30 | Train Loss=2.3666 | Train ACC= 38.21% | Val Loss=2.0228 | Val ACC= 45.33% | Time=66.1s
    → Best model saved (Val ACC: 45.33%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:01<00:00, 12.72it/s, loss=2.2627]


  Epoch 09/30 | Train Loss=2.2539 | Train ACC= 41.04% | Val Loss=1.9195 | Val ACC= 47.46% | Time=64.8s
    → Best model saved (Val ACC: 47.46%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:01<00:00, 12.65it/s, loss=2.0986]


  Epoch 10/30 | Train Loss=2.1637 | Train ACC= 42.76% | Val Loss=1.8456 | Val ACC= 49.04% | Time=65.0s
    → Best model saved (Val ACC: 49.04%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.41it/s, loss=2.0538]


  Epoch 11/30 | Train Loss=2.0864 | Train ACC= 44.47% | Val Loss=1.8469 | Val ACC= 49.19% | Time=66.3s
    → Best model saved (Val ACC: 49.19%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.52it/s, loss=2.1086]


  Epoch 12/30 | Train Loss=1.9980 | Train ACC= 46.59% | Val Loss=1.7445 | Val ACC= 51.96% | Time=65.7s
    → Best model saved (Val ACC: 51.96%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.56it/s, loss=1.4770]


  Epoch 13/30 | Train Loss=1.9331 | Train ACC= 47.83% | Val Loss=1.6971 | Val ACC= 53.49% | Time=65.4s
    → Best model saved (Val ACC: 53.49%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.56it/s, loss=1.9780]


  Epoch 14/30 | Train Loss=1.8602 | Train ACC= 49.70% | Val Loss=1.6288 | Val ACC= 55.34% | Time=65.5s
    → Best model saved (Val ACC: 55.34%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.48it/s, loss=1.9617]


  Epoch 15/30 | Train Loss=1.8036 | Train ACC= 51.18% | Val Loss=1.5889 | Val ACC= 55.81% | Time=65.9s
    → Best model saved (Val ACC: 55.81%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.50it/s, loss=1.1716]


  Epoch 16/30 | Train Loss=1.7369 | Train ACC= 52.61% | Val Loss=1.5305 | Val ACC= 57.26% | Time=65.7s
    → Best model saved (Val ACC: 57.26%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.52it/s, loss=1.5942]


  Epoch 17/30 | Train Loss=1.6863 | Train ACC= 54.03% | Val Loss=1.5032 | Val ACC= 58.26% | Time=65.6s
    → Best model saved (Val ACC: 58.26%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.50it/s, loss=1.8492]


  Epoch 18/30 | Train Loss=1.6295 | Train ACC= 55.55% | Val Loss=1.4818 | Val ACC= 58.93% | Time=65.8s
    → Best model saved (Val ACC: 58.93%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.48it/s, loss=1.6554]


  Epoch 19/30 | Train Loss=1.5838 | Train ACC= 56.71% | Val Loss=1.4380 | Val ACC= 59.80% | Time=65.8s
    → Best model saved (Val ACC: 59.80%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.53it/s, loss=1.8956]


  Epoch 20/30 | Train Loss=1.5330 | Train ACC= 57.54% | Val Loss=1.4209 | Val ACC= 60.58% | Time=65.6s
    → Best model saved (Val ACC: 60.58%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.45it/s, loss=1.5072]


  Epoch 21/30 | Train Loss=1.4845 | Train ACC= 58.98% | Val Loss=1.4140 | Val ACC= 60.77% | Time=66.0s
    → Best model saved (Val ACC: 60.77%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.42it/s, loss=1.1904]


  Epoch 22/30 | Train Loss=1.4566 | Train ACC= 59.77% | Val Loss=1.3778 | Val ACC= 61.92% | Time=66.2s
    → Best model saved (Val ACC: 61.92%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.31it/s, loss=1.6452]


  Epoch 23/30 | Train Loss=1.4123 | Train ACC= 60.84% | Val Loss=1.3589 | Val ACC= 62.14% | Time=66.9s
    → Best model saved (Val ACC: 62.14%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.37it/s, loss=1.3986]


  Epoch 24/30 | Train Loss=1.3889 | Train ACC= 61.10% | Val Loss=1.3469 | Val ACC= 62.60% | Time=66.8s
    → Best model saved (Val ACC: 62.60%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.56it/s, loss=1.3077]


  Epoch 25/30 | Train Loss=1.3721 | Train ACC= 61.86% | Val Loss=1.3223 | Val ACC= 63.09% | Time=65.9s
    → Best model saved (Val ACC: 63.09%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.58it/s, loss=1.5169]


  Epoch 26/30 | Train Loss=1.3404 | Train ACC= 62.72% | Val Loss=1.3230 | Val ACC= 63.37% | Time=65.7s
    → Best model saved (Val ACC: 63.37%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.55it/s, loss=1.3067]


  Epoch 27/30 | Train Loss=1.3227 | Train ACC= 62.99% | Val Loss=1.3127 | Val ACC= 63.28% | Time=65.9s


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.45it/s, loss=1.2675]


  Epoch 28/30 | Train Loss=1.3172 | Train ACC= 63.14% | Val Loss=1.3059 | Val ACC= 63.74% | Time=66.6s
    → Best model saved (Val ACC: 63.74%)


Training OA: 100%|███████████████████████████████████| 781/781 [01:02<00:00, 12.50it/s, loss=1.2079]


  Epoch 29/30 | Train Loss=1.3198 | Train ACC= 63.19% | Val Loss=1.3071 | Val ACC= 63.54% | Time=66.5s


Training OA: 100%|███████████████████████████████████| 781/781 [01:01<00:00, 12.65it/s, loss=1.1399]


  Epoch 30/30 | Train Loss=1.3063 | Train ACC= 63.53% | Val Loss=1.3028 | Val ACC= 63.70% | Time=65.5s

CIFAR100 – Phase 2: Training KDk (Student)


Training KD: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.25it/s, loss=4.9032]


  Epoch 01/30 | Train Loss=4.6743 | Train ACC= 12.19% | Val Loss=3.1229 | Val ACC= 24.07% | Time=67.7s
    → Best model saved (Val ACC: 24.07%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.02it/s, loss=2.5575]


  Epoch 02/30 | Train Loss=2.9458 | Train ACC= 25.99% | Val Loss=2.4112 | Val ACC= 37.33% | Time=68.5s
    → Best model saved (Val ACC: 37.33%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:06<00:00, 11.72it/s, loss=1.8549]


  Epoch 03/30 | Train Loss=2.1854 | Train ACC= 34.88% | Val Loss=2.1125 | Val ACC= 44.51% | Time=69.9s
    → Best model saved (Val ACC: 44.51%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.09it/s, loss=1.6795]


  Epoch 04/30 | Train Loss=1.7936 | Train ACC= 40.68% | Val Loss=1.9586 | Val ACC= 48.10% | Time=67.8s
    → Best model saved (Val ACC: 48.10%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.11it/s, loss=1.2558]


  Epoch 05/30 | Train Loss=1.5590 | Train ACC= 44.93% | Val Loss=1.8565 | Val ACC= 50.90% | Time=68.1s
    → Best model saved (Val ACC: 50.90%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.14it/s, loss=1.3691]


  Epoch 06/30 | Train Loss=1.4161 | Train ACC= 47.37% | Val Loss=1.7346 | Val ACC= 52.28% | Time=68.2s
    → Best model saved (Val ACC: 52.28%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.16it/s, loss=1.3055]


  Epoch 07/30 | Train Loss=1.3160 | Train ACC= 49.62% | Val Loss=1.6560 | Val ACC= 54.67% | Time=67.5s
    → Best model saved (Val ACC: 54.67%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.18it/s, loss=1.2230]


  Epoch 08/30 | Train Loss=1.2432 | Train ACC= 50.93% | Val Loss=1.5286 | Val ACC= 57.44% | Time=67.3s
    → Best model saved (Val ACC: 57.44%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.24it/s, loss=1.1056]


  Epoch 09/30 | Train Loss=1.1800 | Train ACC= 52.64% | Val Loss=1.5206 | Val ACC= 58.15% | Time=67.0s
    → Best model saved (Val ACC: 58.15%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.28it/s, loss=1.1935]


  Epoch 10/30 | Train Loss=1.1320 | Train ACC= 53.72% | Val Loss=1.4931 | Val ACC= 58.77% | Time=66.8s
    → Best model saved (Val ACC: 58.77%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.21it/s, loss=1.0848]


  Epoch 11/30 | Train Loss=1.0924 | Train ACC= 54.89% | Val Loss=1.4606 | Val ACC= 59.50% | Time=67.2s
    → Best model saved (Val ACC: 59.50%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.16it/s, loss=1.0142]


  Epoch 12/30 | Train Loss=1.0562 | Train ACC= 55.67% | Val Loss=1.4510 | Val ACC= 59.85% | Time=68.0s
    → Best model saved (Val ACC: 59.85%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.16it/s, loss=1.0569]


  Epoch 13/30 | Train Loss=1.0282 | Train ACC= 56.36% | Val Loss=1.4205 | Val ACC= 60.24% | Time=68.0s
    → Best model saved (Val ACC: 60.24%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.18it/s, loss=0.9316]


  Epoch 14/30 | Train Loss=0.9970 | Train ACC= 57.37% | Val Loss=1.4078 | Val ACC= 60.64% | Time=67.3s
    → Best model saved (Val ACC: 60.64%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.12it/s, loss=0.9554]


  Epoch 15/30 | Train Loss=0.9707 | Train ACC= 57.76% | Val Loss=1.3923 | Val ACC= 60.76% | Time=67.6s
    → Best model saved (Val ACC: 60.76%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.16it/s, loss=0.9435]


  Epoch 16/30 | Train Loss=0.9461 | Train ACC= 59.10% | Val Loss=1.3699 | Val ACC= 61.90% | Time=67.5s
    → Best model saved (Val ACC: 61.90%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.10it/s, loss=0.9034]


  Epoch 17/30 | Train Loss=0.9267 | Train ACC= 59.35% | Val Loss=1.4070 | Val ACC= 61.18% | Time=67.7s


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.02it/s, loss=0.8665]


  Epoch 18/30 | Train Loss=0.9065 | Train ACC= 60.06% | Val Loss=1.3340 | Val ACC= 62.78% | Time=68.8s
    → Best model saved (Val ACC: 62.78%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.15it/s, loss=0.8168]


  Epoch 19/30 | Train Loss=0.8927 | Train ACC= 60.36% | Val Loss=1.3264 | Val ACC= 62.61% | Time=68.0s


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.14it/s, loss=0.8417]


  Epoch 20/30 | Train Loss=0.8759 | Train ACC= 61.29% | Val Loss=1.2980 | Val ACC= 63.31% | Time=67.6s
    → Best model saved (Val ACC: 63.31%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.08it/s, loss=1.1221]


  Epoch 21/30 | Train Loss=0.8622 | Train ACC= 61.42% | Val Loss=1.2939 | Val ACC= 63.48% | Time=67.9s
    → Best model saved (Val ACC: 63.48%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.18it/s, loss=0.8849]


  Epoch 22/30 | Train Loss=0.8477 | Train ACC= 61.80% | Val Loss=1.2859 | Val ACC= 63.88% | Time=67.3s
    → Best model saved (Val ACC: 63.88%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.16it/s, loss=0.8224]


  Epoch 23/30 | Train Loss=0.8356 | Train ACC= 62.30% | Val Loss=1.2773 | Val ACC= 64.31% | Time=67.4s
    → Best model saved (Val ACC: 64.31%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.19it/s, loss=0.9782]


  Epoch 24/30 | Train Loss=0.8297 | Train ACC= 62.57% | Val Loss=1.2620 | Val ACC= 64.64% | Time=67.6s
    → Best model saved (Val ACC: 64.64%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:03<00:00, 12.23it/s, loss=0.7624]


  Epoch 25/30 | Train Loss=0.8197 | Train ACC= 62.94% | Val Loss=1.2674 | Val ACC= 64.15% | Time=67.8s


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.17it/s, loss=0.6759]


  Epoch 26/30 | Train Loss=0.8134 | Train ACC= 63.02% | Val Loss=1.2598 | Val ACC= 64.62% | Time=67.5s


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.15it/s, loss=0.7362]


  Epoch 27/30 | Train Loss=0.8072 | Train ACC= 63.29% | Val Loss=1.2524 | Val ACC= 64.43% | Time=67.5s


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.10it/s, loss=0.7525]


  Epoch 28/30 | Train Loss=0.7997 | Train ACC= 63.52% | Val Loss=1.2540 | Val ACC= 64.56% | Time=67.8s


Training KD: 100%|███████████████████████████████████| 781/781 [01:04<00:00, 12.14it/s, loss=0.8610]


  Epoch 29/30 | Train Loss=0.8018 | Train ACC= 63.31% | Val Loss=1.2527 | Val ACC= 64.59% | Time=67.5s
  Early stopping triggered.

CIFAR100 – Phase 3: Training KDk+AT (Student + Adv Train)


Training KD: 100%|███████████████████████████████████| 781/781 [01:14<00:00, 10.43it/s, loss=3.8552]


  Epoch 01/30 | Train Loss=4.7304 | Train ACC= 10.88% | Val Loss=3.1423 | Val ACC= 24.03% | Robust= 21.54% | Time=83.7s
    → Best model saved (Robust ACC: 21.54%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:14<00:00, 10.52it/s, loss=2.5387]


  Epoch 02/30 | Train Loss=3.1011 | Train ACC= 22.79% | Val Loss=2.7389 | Val ACC= 32.06% | Robust= 29.08% | Time=83.5s
    → Best model saved (Robust ACC: 29.08%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.36it/s, loss=2.0000]


  Epoch 03/30 | Train Loss=2.3211 | Train ACC= 30.74% | Val Loss=2.1736 | Val ACC= 42.30% | Robust= 39.00% | Time=83.9s
    → Best model saved (Robust ACC: 39.00%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:14<00:00, 10.45it/s, loss=1.8836]


  Epoch 04/30 | Train Loss=1.9528 | Train ACC= 35.47% | Val Loss=2.0889 | Val ACC= 44.92% | Robust= 41.21% | Time=83.6s
    → Best model saved (Robust ACC: 41.21%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:14<00:00, 10.45it/s, loss=1.5425]


  Epoch 05/30 | Train Loss=1.7128 | Train ACC= 39.19% | Val Loss=1.8663 | Val ACC= 50.02% | Robust= 45.71% | Time=83.7s
    → Best model saved (Robust ACC: 45.71%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.36it/s, loss=1.5301]


  Epoch 06/30 | Train Loss=1.5739 | Train ACC= 41.67% | Val Loss=1.7936 | Val ACC= 51.39% | Robust= 46.84% | Time=84.1s
    → Best model saved (Robust ACC: 46.84%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.36it/s, loss=1.3815]


  Epoch 07/30 | Train Loss=1.4603 | Train ACC= 42.68% | Val Loss=1.7997 | Val ACC= 51.95% | Robust= 47.24% | Time=84.5s
    → Best model saved (Robust ACC: 47.24%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.34it/s, loss=1.2360]


  Epoch 08/30 | Train Loss=1.3833 | Train ACC= 44.30% | Val Loss=1.6788 | Val ACC= 54.58% | Robust= 49.38% | Time=84.0s
    → Best model saved (Robust ACC: 49.38%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.23it/s, loss=1.2026]


  Epoch 09/30 | Train Loss=1.3234 | Train ACC= 45.85% | Val Loss=1.5826 | Val ACC= 56.50% | Robust= 51.48% | Time=85.3s
    → Best model saved (Robust ACC: 51.48%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.35it/s, loss=1.3113]


  Epoch 10/30 | Train Loss=1.2691 | Train ACC= 46.90% | Val Loss=1.5403 | Val ACC= 56.80% | Robust= 51.51% | Time=84.2s
    → Best model saved (Robust ACC: 51.51%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.40it/s, loss=1.1878]


  Epoch 11/30 | Train Loss=1.2235 | Train ACC= 47.99% | Val Loss=1.5166 | Val ACC= 57.60% | Robust= 52.39% | Time=83.7s
    → Best model saved (Robust ACC: 52.39%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:14<00:00, 10.44it/s, loss=1.1539]


  Epoch 12/30 | Train Loss=1.1893 | Train ACC= 48.88% | Val Loss=1.4440 | Val ACC= 59.08% | Robust= 53.79% | Time=84.0s
    → Best model saved (Robust ACC: 53.79%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.31it/s, loss=1.1965]


  Epoch 13/30 | Train Loss=1.1567 | Train ACC= 49.28% | Val Loss=1.4382 | Val ACC= 59.82% | Robust= 53.73% | Time=84.3s


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.32it/s, loss=1.2276]


  Epoch 14/30 | Train Loss=1.1269 | Train ACC= 49.86% | Val Loss=1.4084 | Val ACC= 60.43% | Robust= 54.41% | Time=84.6s
    → Best model saved (Robust ACC: 54.41%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.37it/s, loss=1.1285]


  Epoch 15/30 | Train Loss=1.0989 | Train ACC= 50.45% | Val Loss=1.4084 | Val ACC= 60.60% | Robust= 54.18% | Time=84.2s


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.22it/s, loss=1.0635]


  Epoch 16/30 | Train Loss=1.0791 | Train ACC= 51.53% | Val Loss=1.3966 | Val ACC= 60.90% | Robust= 54.71% | Time=85.5s
    → Best model saved (Robust ACC: 54.71%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.38it/s, loss=0.9992]


  Epoch 17/30 | Train Loss=1.0589 | Train ACC= 51.95% | Val Loss=1.3751 | Val ACC= 60.94% | Robust= 55.26% | Time=84.7s
    → Best model saved (Robust ACC: 55.26%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.26it/s, loss=1.0849]


  Epoch 18/30 | Train Loss=1.0393 | Train ACC= 52.49% | Val Loss=1.3629 | Val ACC= 61.58% | Robust= 55.28% | Time=85.0s
    → Best model saved (Robust ACC: 55.28%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.29it/s, loss=1.0412]


  Epoch 19/30 | Train Loss=1.0166 | Train ACC= 52.64% | Val Loss=1.3302 | Val ACC= 62.08% | Robust= 55.80% | Time=85.1s
    → Best model saved (Robust ACC: 55.80%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.17it/s, loss=1.0189]


  Epoch 20/30 | Train Loss=1.0021 | Train ACC= 53.29% | Val Loss=1.3192 | Val ACC= 62.34% | Robust= 56.28% | Time=85.7s
    → Best model saved (Robust ACC: 56.28%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.22it/s, loss=1.1171]


  Epoch 21/30 | Train Loss=0.9917 | Train ACC= 53.50% | Val Loss=1.3130 | Val ACC= 62.83% | Robust= 56.54% | Time=85.7s
    → Best model saved (Robust ACC: 56.54%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.35it/s, loss=1.0091]


  Epoch 22/30 | Train Loss=0.9761 | Train ACC= 53.86% | Val Loss=1.3099 | Val ACC= 62.72% | Robust= 56.81% | Time=83.9s
    → Best model saved (Robust ACC: 56.81%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.34it/s, loss=0.8420]


  Epoch 23/30 | Train Loss=0.9692 | Train ACC= 53.72% | Val Loss=1.2998 | Val ACC= 63.00% | Robust= 56.63% | Time=84.5s


Training KD: 100%|███████████████████████████████████| 781/781 [01:14<00:00, 10.44it/s, loss=0.9721]


  Epoch 24/30 | Train Loss=0.9569 | Train ACC= 54.35% | Val Loss=1.2928 | Val ACC= 63.22% | Robust= 56.32% | Time=84.0s


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.18it/s, loss=0.8346]


  Epoch 25/30 | Train Loss=0.9508 | Train ACC= 54.61% | Val Loss=1.2953 | Val ACC= 63.13% | Robust= 56.72% | Time=85.8s


Training KD: 100%|███████████████████████████████████| 781/781 [01:15<00:00, 10.28it/s, loss=0.9665]


  Epoch 26/30 | Train Loss=0.9451 | Train ACC= 54.79% | Val Loss=1.2845 | Val ACC= 63.35% | Robust= 57.00% | Time=85.4s
    → Best model saved (Robust ACC: 57.00%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.15it/s, loss=0.9095]


  Epoch 27/30 | Train Loss=0.9409 | Train ACC= 54.95% | Val Loss=1.2791 | Val ACC= 63.34% | Robust= 57.18% | Time=85.8s
    → Best model saved (Robust ACC: 57.18%)


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.24it/s, loss=0.9281]


  Epoch 28/30 | Train Loss=0.9313 | Train ACC= 55.35% | Val Loss=1.2779 | Val ACC= 63.50% | Robust= 57.08% | Time=85.5s


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.25it/s, loss=0.9229]


  Epoch 29/30 | Train Loss=0.9362 | Train ACC= 55.11% | Val Loss=1.2778 | Val ACC= 63.56% | Robust= 57.18% | Time=84.8s


Training KD: 100%|███████████████████████████████████| 781/781 [01:16<00:00, 10.20it/s, loss=0.8680]


  Epoch 30/30 | Train Loss=0.9312 | Train ACC= 55.24% | Val Loss=1.2746 | Val ACC= 63.52% | Robust= 57.20% | Time=85.9s
    → Best model saved (Robust ACC: 57.20%)

CIFAR100 – Phase 4: Training KDk+AT+DP (Adding Privacy)


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:17<00:00, 10.09it/s]


  Epoch 01/30 | Train Loss=1.0277 | Train ACC= 53.32% | Val Loss=1.3403 | Val ACC= 62.07% | Robust= 56.35% | Time=86.2s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:16<00:00, 10.16it/s]


  Epoch 02/30 | Train Loss=1.0870 | Train ACC= 52.66% | Val Loss=1.3686 | Val ACC= 61.14% | Robust= 55.94% | Time=85.9s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:16<00:00, 10.19it/s]


  Epoch 03/30 | Train Loss=1.1528 | Train ACC= 52.40% | Val Loss=1.3698 | Val ACC= 61.47% | Robust= 57.26% | Time=85.1s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:14<00:00, 10.47it/s]


  Epoch 04/30 | Train Loss=1.1576 | Train ACC= 53.22% | Val Loss=1.3875 | Val ACC= 60.81% | Robust= 57.41% | Time=83.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.33it/s]


  Epoch 05/30 | Train Loss=1.1645 | Train ACC= 53.70% | Val Loss=1.3804 | Val ACC= 60.83% | Robust= 57.74% | Time=84.9s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.28it/s]


  Epoch 06/30 | Train Loss=1.1802 | Train ACC= 53.58% | Val Loss=1.4103 | Val ACC= 60.29% | Robust= 57.61% | Time=84.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.34it/s]


  Epoch 07/30 | Train Loss=1.1878 | Train ACC= 53.62% | Val Loss=1.4074 | Val ACC= 60.85% | Robust= 57.73% | Time=84.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:16<00:00, 10.24it/s]


  Epoch 08/30 | Train Loss=1.1812 | Train ACC= 53.86% | Val Loss=1.4056 | Val ACC= 60.66% | Robust= 57.91% | Time=84.9s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:17<00:00, 10.13it/s]


  Epoch 09/30 | Train Loss=1.1871 | Train ACC= 54.19% | Val Loss=1.4207 | Val ACC= 60.28% | Robust= 57.74% | Time=86.2s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:16<00:00, 10.24it/s]


  Epoch 10/30 | Train Loss=1.1871 | Train ACC= 54.27% | Val Loss=1.4160 | Val ACC= 60.48% | Robust= 58.18% | Time=84.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:14<00:00, 10.43it/s]


  Epoch 11/30 | Train Loss=1.2026 | Train ACC= 54.09% | Val Loss=1.4233 | Val ACC= 60.24% | Robust= 58.34% | Time=83.9s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:13<00:00, 10.59it/s]


  Epoch 12/30 | Train Loss=1.2042 | Train ACC= 54.16% | Val Loss=1.4342 | Val ACC= 59.94% | Robust= 58.40% | Time=83.1s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.36it/s]


  Epoch 13/30 | Train Loss=1.2102 | Train ACC= 54.62% | Val Loss=1.4357 | Val ACC= 59.89% | Robust= 58.35% | Time=83.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:14<00:00, 10.43it/s]


  Epoch 14/30 | Train Loss=1.2105 | Train ACC= 54.49% | Val Loss=1.4318 | Val ACC= 59.72% | Robust= 58.56% | Time=83.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:14<00:00, 10.49it/s]


  Epoch 15/30 | Train Loss=1.2063 | Train ACC= 55.04% | Val Loss=1.4266 | Val ACC= 60.11% | Robust= 58.63% | Time=83.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:16<00:00, 10.27it/s]


  Epoch 16/30 | Train Loss=1.2186 | Train ACC= 54.88% | Val Loss=1.4225 | Val ACC= 60.41% | Robust= 58.93% | Time=84.9s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.39it/s]


  Epoch 17/30 | Train Loss=1.2266 | Train ACC= 54.72% | Val Loss=1.4220 | Val ACC= 60.24% | Robust= 58.99% | Time=84.2s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.35it/s]


  Epoch 18/30 | Train Loss=1.2316 | Train ACC= 55.12% | Val Loss=1.4283 | Val ACC= 59.77% | Robust= 58.69% | Time=83.8s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.29it/s]


  Epoch 19/30 | Train Loss=1.2320 | Train ACC= 55.09% | Val Loss=1.4344 | Val ACC= 59.89% | Robust= 58.74% | Time=84.9s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.40it/s]


  Epoch 20/30 | Train Loss=1.2180 | Train ACC= 55.91% | Val Loss=1.4454 | Val ACC= 59.83% | Robust= 58.56% | Time=84.3s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.29it/s]


  Epoch 21/30 | Train Loss=1.2357 | Train ACC= 55.32% | Val Loss=1.4232 | Val ACC= 60.16% | Robust= 58.93% | Time=84.6s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.40it/s]


  Epoch 22/30 | Train Loss=1.2251 | Train ACC= 55.48% | Val Loss=1.4274 | Val ACC= 60.40% | Robust= 59.41% | Time=84.2s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.34it/s]


  Epoch 23/30 | Train Loss=1.2237 | Train ACC= 55.94% | Val Loss=1.4236 | Val ACC= 60.18% | Robust= 58.97% | Time=83.9s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.31it/s]


  Epoch 24/30 | Train Loss=1.2209 | Train ACC= 55.98% | Val Loss=1.4272 | Val ACC= 60.42% | Robust= 59.28% | Time=84.7s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.38it/s]


  Epoch 25/30 | Train Loss=1.2158 | Train ACC= 55.89% | Val Loss=1.4177 | Val ACC= 60.62% | Robust= 59.51% | Time=84.2s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.36it/s]


  Epoch 26/30 | Train Loss=1.2037 | Train ACC= 56.28% | Val Loss=1.4293 | Val ACC= 60.20% | Robust= 59.18% | Time=84.0s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:14<00:00, 10.49it/s]


  Epoch 27/30 | Train Loss=1.2086 | Train ACC= 56.16% | Val Loss=1.4132 | Val ACC= 60.47% | Robust= 59.30% | Time=83.4s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:14<00:00, 10.42it/s]


  Epoch 28/30 | Train Loss=1.2184 | Train ACC= 56.47% | Val Loss=1.4174 | Val ACC= 60.46% | Robust= 59.63% | Time=83.5s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.33it/s]


  Epoch 29/30 | Train Loss=1.2112 | Train ACC= 56.44% | Val Loss=1.4290 | Val ACC= 59.94% | Robust= 59.03% | Time=84.5s


Training KD+AT+DP: 100%|██████████████████████████████████████████| 781/781 [01:15<00:00, 10.38it/s]


  Epoch 30/30 | Train Loss=1.2154 | Train ACC= 56.49% | Val Loss=1.4173 | Val ACC= 60.09% | Robust= 59.41% | Time=84.6s
  Saved privacy-enhanced model.

Training completed for all datasets!


In [None]:
# Cell 7 – FOCUSED EVALUATION: Analyzing CIFAR-100 Only
from IPython.display import display
import pandas as pd
from pathlib import Path
import torch
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import f1_score, roc_auc_score, top_k_accuracy_score

print("--- FOCUSED EVALUATION INITIATED ---")
print("Loading pre-trained CIFAR-10 models from Google Drive and running analysis.")

# Manually define the dictionary for only the completed Vision models
SAVED_MODELS_VISION = {
    "CIFAR100": {
        "OA": "CIFAR10_OA_best.pth",
        "KDk": "CIFAR10_KDk_best.pth",
        "KDk+AT": "CIFAR10_KDkAT_best.pth",
        "KDk+AT+DP": "CIFAR10_KDkATDP_best.pth"
    }
}

drive_results_dir = Path(CONFIG["DRIVE_PATH"]) / "VFL_Results" / "VISION"
evaluation_results = []
attack_modes = ["passive", "direct", "active", "perturbed"]

for dataset_name, models in SAVED_MODELS_VISION.items():
    loaders = VISION_LOADERS[dataset_name]
    tr_loader, te_loader = loaders["train"], loaders["test"]
    num_classes = loaders["num_classes"]
    hp = CONFIG["DATASET_HP"][dataset_name]

    for model_type, model_path in models.items():
        print(f"Evaluating {dataset_name} - {model_type}...")
        model = build_vfl_model_vision(num_classes)
        model.load_state_dict(torch.load(drive_results_dir / model_path,
                                         map_location=CONFIG["DEVICE"]))
        model.eval()

        # Capture Top-1 and Top-5 accuracy
        _, clean_acc_top1, clean_acc_top5 = evaluate_model(model, te_loader, CONFIG, dataset_name=dataset_name)
        _, robust_acc_top1, robust_acc_top5 = evaluate_model(model, te_loader, CONFIG, robust=True, dataset_name=dataset_name)

        # F1 & AUC
        y_true, y_pred, y_probs = [], [], []
        with torch.no_grad():
            for (xa, xp), y in te_loader:
                xa, xp, y = xa.to(CONFIG["DEVICE"]), xp.to(CONFIG["DEVICE"]), y.to(CONFIG["DEVICE"])
                logits = model(xa, xp)
                probs = F.softmax(logits, dim=1).cpu().numpy()
                preds = logits.argmax(1).cpu().numpy()
                y_true.extend(y.cpu().numpy()); y_pred.extend(preds); y_probs.extend(probs)
        y_true, y_pred, y_probs = np.array(y_true), np.array(y_pred), np.array(y_probs)
        f1 = f1_score(y_true, y_pred, average='macro') * 100
        auc = roc_auc_score(y_true, y_probs, multi_class='ovr') * 100

        # Privacy Attacks
        attack_results = {}
        for attack in attack_modes:
            a1, a5 = run_privacy_attack_vision_multimode(
                model, tr_loader, te_loader, CONFIG, num_classes,
                attack_type=attack, aux_batches_limit=4, attacker_epochs=2)
            attack_results[attack] = {"top1": a1, "top5": a5}

        pli = (1 - attack_results["passive"]["top1"]/100.0) * (robust_acc_top1 / max(clean_acc_top1,1)) * 100

        evaluation_results.append({
            "Dataset": dataset_name,
            "Model": model_type,
            "Clean ACC (Top-1 %)": round(clean_acc_top1, 2),
            "Clean ACC (Top-5 %)": round(clean_acc_top5, 2),
            "Robust ACC (Top-1 %)": round(robust_acc_top1, 2),
            "Robust ACC (Top-5 %)": round(robust_acc_top5, 2),
            "F1 (%)": round(f1, 2),
            "AUC (%)": round(auc, 2),
            "ASR_passive (Top-1 %)": round(attack_results["passive"]["top1"], 2),
            "ASR_passive (Top-5 %)": round(attack_results["passive"]["top5"], 2),
            "ASR_direct (Top-1 %)": round(attack_results["direct"]["top1"], 2),
            "ASR_direct (Top-5 %)": round(attack_results["direct"]["top5"], 2),
            "ASR_active (Top-1 %)": round(attack_results["active"]["top1"], 2),
            "ASR_active (Top-5 %)": round(attack_results["active"]["top5"], 2),
            "ASR_perturbed (Top-1 %)": round(attack_results["perturbed"]["top1"], 2),
            "ASR_perturbed (Top-5 %)": round(attack_results["perturbed"]["top5"], 2),
            "Privacy Leakage Index": round(pli, 2)
        })

eval_df = pd.DataFrame(evaluation_results)
eval_path = drive_results_dir / "evaluation_results_cifar10_only.csv"
eval_df.to_csv(eval_path, index=False)
display(eval_df)
print(f"\nEvaluation complete for CIFAR-10. Results saved to: {eval_path}")

--- FOCUSED EVALUATION INITIATED ---
Loading pre-trained CIFAR-100 models from Google Drive and running analysis.
Evaluating CIFAR100 - OA...
Evaluating CIFAR100 - KDk...
Evaluating CIFAR100 - KDk+AT...
Evaluating CIFAR100 - KDk+AT+DP...


Unnamed: 0,Dataset,Model,Clean ACC (Top-1 %),Clean ACC (Top-5 %),Robust ACC (Top-1 %),Robust ACC (Top-5 %),F1 (%),AUC (%),ASR_passive (Top-1 %),ASR_passive (Top-5 %),ASR_direct (Top-1 %),ASR_direct (Top-5 %),ASR_active (Top-1 %),ASR_active (Top-5 %),ASR_perturbed (Top-1 %),ASR_perturbed (Top-5 %),Privacy Leakage Index
0,CIFAR100,OA,63.74,87.96,39.71,74.58,63.58,98.58,4.75,14.89,2.09,9.07,7.31,20.57,4.52,14.73,59.34
1,CIFAR100,KDk,64.64,88.96,42.03,76.67,64.36,98.69,2.52,10.08,1.94,8.11,4.44,14.7,2.63,11.07,63.38
2,CIFAR100,KDk+AT,63.52,88.55,57.2,85.25,63.27,98.64,2.55,10.2,1.65,7.53,6.68,17.94,3.36,9.84,87.75
3,CIFAR100,KDk+AT+DP,60.09,86.34,59.41,85.81,59.82,98.34,2.82,10.26,1.99,6.77,3.18,12.38,3.11,11.06,96.08



Evaluation complete for CIFAR-10. Results saved to: /content/VFL_Results/VISION/evaluation_results_cifar10_only.csv
