# Adversarial Attacks with ResNet-34 (Updated)

This notebook performs two tasks on a 100-class subset of ImageNet-1K:
1. Evaluate clean top-1/top-5 accuracy using a pretrained ResNet-34.
2. Generate adversarial examples using FGSM (ε=0.02), re-evaluate accuracy, and visualize misclassifications with human-readable labels.


## 1. Imports and Preprocessing

In [None]:
import json
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms, datasets, models
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Normalization constants
mean_norms = np.array([0.485, 0.456, 0.406])
std_norms  = np.array([0.229, 0.224, 0.225])

plain_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean_norms, std=std_norms),
])

## 2. Task 1: Clean Accuracy Evaluation

In [None]:
# Load dataset
dataset = datasets.ImageFolder(
    root="./TestDataSet/TestDataSet",
    transform=plain_transforms
)
loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4)

# Load mapping for 401–500 indices
with open("./TestDataSet/TestDataSet/labels_list.json") as f:
    entries = json.load(f)
idx_to_true = {i: int(entries[i].split(":",1)[0]) for i in range(len(entries))}

# Load pretrained ResNet-34
model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
model.eval().to(device)

# Evaluate clean accuracy
top1 = top5 = total = 0
with torch.no_grad():
    for imgs, labels in tqdm(loader, desc="Clean Eval"):
        imgs = imgs.to(device)
        logits = model(imgs)
        _, p5 = logits.topk(5, dim=1)
        true = torch.tensor([idx_to_true[int(l)] for l in labels], device=p5.device)
        top1 += (p5[:,0] == true).sum().item()
        top5 += (p5 == true.unsqueeze(1)).any(dim=1).sum().item()
        total += labels.size(0)

print(f"Clean Top-1 accuracy: {top1/total*100:.2f}%")
print(f"Clean Top-5 accuracy: {top5/total*100:.2f}%")

## 3. Task 2: FGSM Attack & Adversarial Evaluation

In [None]:
# FGSM helper functions and bounds
cn = torch.tensor(mean_norms, device=device)[:,None,None]
cs = torch.tensor(std_norms, device=device)[:,None,None]
min_val = (0 - cn) / cs
max_val = (1 - cn) / cs

def fgsm(image, eps, grad):
    return torch.max(torch.min(image + eps * grad.sign(), max_val), min_val)

# Prepare for FGSM loop
si_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)

epsilon = 0.02
orig_images = []
adv_images = []
adv_labels = []
orig_preds = []
adv_preds = []

# Run FGSM on every image
for img, lab in tqdm(si_loader, desc="FGSM Attack"):
    img = img.to(device).detach().requires_grad_(True)
    lab = lab.to(device)

    # store clean image
    orig_images.append(img.squeeze(0).cpu())

    # forward + loss
    out = model(img)
    true_idx = torch.tensor([idx_to_true[int(lab)]], device=device)
    loss = F.cross_entropy(out, true_idx)

    # backward
    model.zero_grad()
    loss.backward()
    grad = img.grad.data

    # generate adversarial
    adv = fgsm(img, epsilon, grad).detach().to(torch.float32)

    adv_images.append(adv.squeeze(0).cpu())
    adv_labels.append(int(lab))
    orig_preds.append(out.argmax(1).item())
    adv_preds.append(model(adv).argmax(1).item())

# Build adversarial dataset
adv_tensor = torch.stack(adv_images)
lab_tensor = torch.tensor(adv_labels)
adv_set = TensorDataset(adv_tensor, lab_tensor)
adv_loader = DataLoader(adv_set, batch_size=32, shuffle=False)


In [None]:
# Evaluate adversarial accuracy
top1 = top5 = total = 0
with torch.no_grad():
    for imgs, labs in adv_loader:
        imgs = imgs.to(device)
        out = model(imgs)
        _, p5 = out.topk(5, dim=1)
        true = torch.tensor([idx_to_true[int(l)] for l in labs], device=p5.device)
        top1 += (p5[:,0] == true).sum().item()
        top5 += (p5 == true.unsqueeze(1)).any(dim=1).sum().item()
        total += labs.size(0)

print(f"Adversarial Top-1 accuracy: {top1/total*100:.2f}%")
print(f"Adversarial Top-5 accuracy: {top5/total*100:.2f}%")

## 4. Visualization of Misclassifications

In [None]:
# Prepare label maps for human-readable names
weights = models.ResNet34_Weights.IMAGENET1K_V1
all_labels = weights.meta['categories']
label_401_to_500 = {i: all_labels[i] for i in range(401, 501)}

# Find 3 examples where clean was correct but adv is misclassified
picked = []
for i, (o, a, l) in enumerate(zip(orig_preds, adv_preds, adv_labels)):
    true_img_idx = idx_to_true[l]
    if o == true_img_idx and a != true_img_idx:
        picked.append(i)
    if len(picked) >= 3:
        break

# Un-normalizer
inv_norm = transforms.Normalize(
    mean=(-mean_norms / std_norms).tolist(),
    std=(1 / std_norms).tolist()
)

# Plot
fig, axes = plt.subplots(2, 3, figsize=(12, 6))
for col, i in enumerate(picked):
    true_idx = idx_to_true[adv_labels[i]]
    adv_idx = adv_preds[i]

    true_name = label_401_to_500[true_idx]
    adv_name = all_labels[adv_idx]

    for row, img in enumerate([orig_images[i], adv_images[i]]):
        ax = axes[row, col]
        im = inv_norm(img).permute(1, 2, 0).clamp(0, 1).numpy()
        ax.imshow(im)
        ax.axis('off')
        if row == 0:
            ax.set_title(f"orig\ntrue={true_name}")
        else:
            ax.set_title(f"adv\npred={adv_name}")
plt.tight_layout()
plt.show()