# Introduction

This notebook tests the transferability of AutoAttack [1] on clean samples from the ImageNet-Val [3] dataset. The attacks are performed on a ResNet-50 [4] victim classifier, and the transferability to ResNet-18, ResNet-34, ResNet-152, Inception-V3 [5], and ViT [6] classifiers is calculated. To run the notebook ensure that the environment has been loaded from the provided `environment.yml` file, and that the ImageNet dataset has been downloaded from [here](https://image-net.org/download.php) and extracted to the local file system.

We now give an exact definition of the attack success rate (ASR). Given a set of clean samples, $\{ \boldsymbol{x}_1 , \boldsymbol{x}_2, \dots, \boldsymbol{x}_N \}$, the corresponding adversarial samples, $\{ \tilde{\boldsymbol{x}}_1 , \tilde{\boldsymbol{x}}_2, \dots, \tilde{\boldsymbol{x}}_N \}$, the associated true labels, $\{ y_1, y_2, \dots, y_N \}$, and--in the case of targeted attacks--the target adversarial labels, $\{ \tilde{y}_1, \tilde{y}_2, \dots, \tilde{y}_N \}$, we define the targeted and untargeted attack success rate (ASR) against classifier a, $f : X \rightarrow \mathcal{Y}$, as: $$\begin{align*}
    \text{targeted ASR} &= \frac{\sum_{i=1}^N \mathbb{1}_{\{ f(x_i) = y_i \}} \cdot \mathbb{1}_{\{ f(\tilde{x}_i) = \tilde{y}_i \}}}{\sum_{i=1}^N \mathbb{1}_{\{ f(x_i) = y_i \}}}; \\
    \text{untargeted ASR} &= \frac{\sum_{i=1}^N \mathbb{1}_{\{ f(x_i) = y_i \}} \cdot \mathbb{1}_{\{ f(\tilde{x}_i) \neq y_i \}}}{\sum_{i=1}^N \mathbb{1}_{\{ f(x_i) = y_i \}}}. \\
\end{align*}$$

# References
[1] F. Croce and M. Hein, Reliable evaluation of adversarial robustness with an ensemble of diverse parameter-free attacks, Auto-Attack, 2020.</br>
[2] H. Kim, “Torchattacks: A pytorch repository for adversarial attacks,” arXiv preprint arXiv:2010.01950, 2020.</br>
[3] J. Deng, W. Dong, R. Socher, L.-J. Li, K. Li, and F. F. Li, “ImageNet: A large-scale hierarchical image database,” in 2009 IEEE Conference on Computer Vision and Pattern Recognition, 2009.</br>
[4] K. He, X. Zhang, S. Ren, and J. Sun, Deep residual learning for image recognition, ResNet architecture, 2015.</br>
[5] C. Szegedy, W. Liu, Y. Jia, et al., Going deeper with convolutions, 2014.</br>
[6] A. Dosovitskiy, L. Beyer, A. Kolesnikov, et al., An image is worth 16x16 words: Transformers for image recognition at scale, 2021.

## Imports

In [None]:
import numpy as np
import torch
from tqdm import tqdm
import timm
import pathlib
import pandas as pd

from torchvision.utils import save_image
from torchvision.transforms import Compose, Resize

from torchvision.models import (
    resnet152,
    resnet50,
    resnet34,
    resnet18,
    inception_v3,
    vit_h_14,
    ResNet152_Weights,
    ResNet50_Weights,
    ResNet34_Weights,
    ResNet18_Weights,
    Inception_V3_Weights,
    ViT_H_14_Weights,
)
from dataset_readers import ImageDataset, NatAdvDiffImageDataset, ImageNetDataset

from typing import Union, Iterable, Optional, Tuple, List, Any, Callable, Dict

from autoattack.autoattack import AutoAttack

from dataset_readers import IMAGENET_CLASSES
from misc import array_to_PIL

### SPECIFY DTYPE AND IMAGENET PATH ###
PATH_TO_IMAGENET = "/home/max/DATA/SSD/DATASETS/IMAGENET_LARGE"
DTYPE = torch.float32

## Loading Pytorch Models

In [None]:
resnet_18 = resnet18(ResNet18_Weights.IMAGENET1K_V1).eval().to(dtype=DTYPE, device="cuda")
resnet_34 = resnet34(ResNet34_Weights.IMAGENET1K_V1).eval().to(dtype=DTYPE, device="cuda")
resnet_50 = resnet50(ResNet50_Weights.IMAGENET1K_V2).eval().to(dtype=DTYPE, device="cuda")
resnet_152 = resnet152(ResNet152_Weights.IMAGENET1K_V2).eval().to(dtype=DTYPE, device="cuda")
inception = inception_v3(Inception_V3_Weights.IMAGENET1K_V1).eval().to(dtype=DTYPE, device="cuda")
vit = vit_h_14(ViT_H_14_Weights.IMAGENET1K_SWAG_LINEAR_V1).eval().to(dtype=DTYPE, device="cuda")

## Model Preprocessor

In [None]:
TRANSFORMS = [Resize((224, 224))]
preprocessor = Compose(TRANSFORMS)

## Loading ImageNet-Val Dataset

In [None]:
imagenet_val = ImageNetDataset(image_dir="/home/max/DATA/SSD/DATASETS/IMAGENET_LARGE", imagenet_type="val")
print(IMAGENET_CLASSES["id2label"][imagenet_val[0][1].item()])
array_to_PIL(imagenet_val[0][0])

## Classifier Test Run

In [None]:
def get_class_prob(model, img):
    prob = torch.softmax(model(img), dim = 1)
    predicted_class = prob.argmax(dim = -1).item()
    return predicted_class, prob

In [None]:
idx = 9

pt_img, y = imagenet_val[idx]
pt_img = pt_img.to(dtype = DTYPE, device = "cuda").unsqueeze(0)
y = y.item()
pt_img = pt_img / 255 # Normalising
pt_img_processed = preprocessor(pt_img)

yres_18, yres_18_prob = get_class_prob(resnet_18, pt_img_processed)
yres_34, yres_34_prob = get_class_prob(resnet_34, pt_img_processed)
yres_50, yres_50_prob = get_class_prob(resnet_50, pt_img_processed)
yres_152, yres_152_prob = get_class_prob(resnet_152, pt_img_processed)
yres_inc, yres_inc_prob = get_class_prob(inception, pt_img_processed)
yres_vit, yres_vit_prob = get_class_prob(vit, pt_img_processed)

print(f"TRUE CLASS: {IMAGENET_CLASSES['id2label'][y]} ({y})")
print(60*"=")
print(f"resnet18 PREDICTION  : {IMAGENET_CLASSES['id2label'][yres_18]} ({round(yres_18_prob[0, yres_18].item()*100,2)}%)")
print(f"resnet34 PREDICTION  : {IMAGENET_CLASSES['id2label'][yres_34]} ({round(yres_34_prob[0, yres_34].item()*100,2)}%)")
print(f"resnet50 PREDICTION  : {IMAGENET_CLASSES['id2label'][yres_50]} ({round(yres_50_prob[0, yres_50].item()*100,2)}%)")
print(f"resnet152 PREDICTION : {IMAGENET_CLASSES['id2label'][yres_152]} ({round(yres_152_prob[0, yres_152].item()*100,2)}%)")
print(f"inception PREDICTION : {IMAGENET_CLASSES['id2label'][yres_inc]} ({round(yres_inc_prob[0, yres_inc].item()*100,2)}%)")
print(f"vit PREDICTION       : {IMAGENET_CLASSES['id2label'][yres_vit]} ({round(yres_vit_prob[0, yres_vit].item()*100,2)}%)")
print(60*"=")

array_to_PIL(pt_img[0].cpu() * 255)

## AutoAttack Test Run

In [None]:
idx = 9
MODEL = resnet_50

attacker = AutoAttack(MODEL, eps = 8 / 255, norm="Linf")

pt_img, y = imagenet_val[idx]
pt_img = pt_img.to(dtype = DTYPE, device = "cuda").unsqueeze(0)
y = y.to(dtype = torch.int64, device = "cuda").unsqueeze(0)

pt_img = pt_img / 255 # Normalising
pt_img_processed = preprocessor(pt_img)

pt_adv = attacker.run_standard_evaluation(pt_img_processed, y, bs = 1)

y = y.item()

yres_18, yres_18_prob = get_class_prob(resnet_18, pt_adv)
yres_34, yres_34_prob = get_class_prob(resnet_34, pt_adv)
yres_50, yres_50_prob = get_class_prob(resnet_50, pt_adv)
yres_152, yres_152_prob = get_class_prob(resnet_152, pt_adv)
yres_inc, yres_inc_prob = get_class_prob(inception, pt_adv)
yres_vit, yres_vit_prob = get_class_prob(vit, pt_adv)

print(f"TRUE CLASS: {IMAGENET_CLASSES['id2label'][y]} ({y})")
print(60*"=")
print(f"resnet18 PREDICTION  : {IMAGENET_CLASSES['id2label'][yres_18]} ({round(yres_18_prob[0, yres_18].item()*100,2)}%)")
print(f"resnet34 PREDICTION  : {IMAGENET_CLASSES['id2label'][yres_34]} ({round(yres_34_prob[0, yres_34].item()*100,2)}%)")
print(f"resnet50 PREDICTION  : {IMAGENET_CLASSES['id2label'][yres_50]} ({round(yres_50_prob[0, yres_50].item()*100,2)}%)")
print(f"resnet152 PREDICTION : {IMAGENET_CLASSES['id2label'][yres_152]} ({round(yres_152_prob[0, yres_152].item()*100,2)}%)")
print(f"inception PREDICTION : {IMAGENET_CLASSES['id2label'][yres_inc]} ({round(yres_inc_prob[0, yres_inc].item()*100,2)}%)")
print(f"vit PREDICTION       : {IMAGENET_CLASSES['id2label'][yres_vit]} ({round(yres_vit_prob[0, yres_vit].item()*100,2)}%)")
print(60*"=")

array_to_PIL(pt_adv[0].cpu() * 255)

## Running AutoAttack on ImageNet

In [None]:
N = 50 # Number of ImageNet images to attack
MODEL = resnet_50 # Model to generate the adversarial samples with

attacker = AutoAttack(MODEL, eps = 4 / 255, norm="Linf", verbose=False)

clean_accuracies = {"res18" : 0, "res34" : 0, "res50" : 0, "res152" : 0, "inc" : 0, "vit" : 0}
adversarial_accuracies = {"res18" : 0, "res34" : 0, "res50" : 0, "res152" : 0, "inc" : 0, "vit" : 0}
untargeted_attack_success = {"res18" : 0, "res34" : 0, "res50" : 0, "res152" : 0, "inc" : 0, "vit" : 0}
overlap_totals = {"res18" : 0, "res34" : 0, "res50" : 0, "res152" : 0, "inc" : 0, "vit" : 0}
for i in tqdm(range(N)):
    pt_img, y = imagenet_val[i]
    pt_img = pt_img.to(dtype = DTYPE, device = "cuda").unsqueeze(0)
    y = y.to(dtype = torch.int64, device = "cuda").unsqueeze(0)

    pt_img = pt_img / 255 # Normalising
    pt_img_processed = preprocessor(pt_img)

    adv_results = attacker.run_standard_evaluation(pt_img_processed, y, return_labels = True, bs = 1)
    pt_adv, ya = adv_results

    y = y.item()

    # Clean
    yres_18, yres_18_prob = get_class_prob(resnet_18, pt_img_processed)
    yres_34, yres_34_prob = get_class_prob(resnet_34, pt_img_processed)
    yres_50, yres_50_prob = get_class_prob(resnet_50, pt_img_processed)
    yres_152, yres_152_prob = get_class_prob(resnet_152, pt_img_processed)
    y_inc, y_inc_prob = get_class_prob(inception, pt_img_processed)
    y_vit, y_vit_prob = get_class_prob(vit, pt_img_processed)

    # Adversarial
    ayres_18, ayres_18_prob = get_class_prob(resnet_18, pt_adv)
    ayres_34, ayres_34_prob = get_class_prob(resnet_34, pt_adv)
    ayres_50, ayres_50_prob = get_class_prob(resnet_50, pt_adv)
    ayres_152, ayres_152_prob = get_class_prob(resnet_152, pt_adv)
    ay_inc, ay_inc_prob = get_class_prob(inception, pt_adv)
    ay_vit, ay_vit_prob = get_class_prob(vit, pt_adv)

    # Clean Accuracy
    if yres_18 == y:
        clean_accuracies["res18"] += 1
    if yres_34 == y:
        clean_accuracies["res34"] += 1
    if yres_50 == y:
        clean_accuracies["res50"] += 1
    if yres_152 == y:
        clean_accuracies["res152"] += 1
    if y_inc == y:
        clean_accuracies["inc"] += 1
    if y_vit == y:
        clean_accuracies["vit"] += 1

    # Adversarial Accuracy
    if ayres_18 == y:
        adversarial_accuracies["res18"] += 1
    if ayres_34 == y:
        adversarial_accuracies["res34"] += 1
    if ayres_50 == y:
        adversarial_accuracies["res50"] += 1
    if ayres_152 == y:
        adversarial_accuracies["res152"] += 1
    if ay_inc == y:
        adversarial_accuracies["inc"] += 1
    if ay_vit == y:
        adversarial_accuracies["vit"] += 1

    # Attack Success Rate
    mpred, _ = get_class_prob(MODEL, pt_img_processed)
    if yres_18 == mpred == y:
        if ayres_18 != y:
            untargeted_attack_success["res18"] += 1
        overlap_totals["res18"] += 1 
    if yres_34 == mpred == y:
        if ayres_34 != y:
            untargeted_attack_success["res34"] += 1
        overlap_totals["res34"] += 1
    if yres_50 == mpred == y:
        if ayres_50 != y:
            untargeted_attack_success["res50"] += 1
        overlap_totals["res50"] += 1
    if yres_152 == mpred == y:
        if ayres_152 != y:
            untargeted_attack_success["res152"] += 1
        overlap_totals["res152"] += 1
    if y_inc == mpred == y:
        if ay_inc != y:
            untargeted_attack_success["inc"] += 1
        overlap_totals["inc"] += 1
    if y_vit == mpred == y:
        if ay_vit != y:
            untargeted_attack_success["vit"] += 1
        overlap_totals["vit"] += 1

for k, v in clean_accuracies.items():
    clean_accuracies[k] = v / N * 100
for k, v in adversarial_accuracies.items():
    adversarial_accuracies[k] = v / N * 100
for k, v in untargeted_attack_success.items():
    untargeted_attack_success[k] = v / overlap_totals[k] * 100

In [None]:
DATA = {
    "Clean Accuracy" : clean_accuracies,
    "Adversarial Accuracy" : adversarial_accuracies,
    "Untargeted Attack Success Rate" : untargeted_attack_success
}
pd.DataFrame(DATA).T.round(2)