<a href="https://colab.research.google.com/github/lorenrossi/Trustwothy-AI/blob/main/ART.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install adversarial-robustness-toolbox

Collecting adversarial-robustness-toolbox
  Downloading adversarial_robustness_toolbox-1.20.1-py3-none-any.whl.metadata (10 kB)
Downloading adversarial_robustness_toolbox-1.20.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: adversarial-robustness-toolbox
Successfully installed adversarial-robustness-toolbox-1.20.1


In [None]:
import torch
import time
import os
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
import torchvision
import torchvision.transforms as transforms
import numpy as np
import math
import pandas as pd
import random
import matplotlib.pyplot as plt
import inspect

import art
import art.datasets


In [None]:
submodules = [
    name for name, obj in inspect.getmembers(art)
    if inspect.ismodule(obj)
]

print(submodules)

['attacks', 'config', 'data_generators', 'defences', 'estimators', 'evaluations', 'exceptions', 'logging', 'metrics', 'optimizers', 'preprocessing', 'summary_writer', 'utils', 'visualization']


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

from art.estimators.classification import PyTorchClassifier

# -------------------------
# CHOOSE DATASET HERE!
dataset_name = "cifar10"   # <-- change to "cifar100"
# -------------------------

cifar_mean = [0.4914, 0.4822, 0.4465]
cifar_std  = [0.2023, 0.1994, 0.2010]

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(cifar_mean, cifar_std),
])

if dataset_name == "cifar10":
  testset = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_test)
  model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet20", pretrained=True)
  num_classes = 10
  input_shape = testset[0][0].size()

elif dataset_name == "cifar100":
  testset = torchvision.datasets.CIFAR100(
        root="./data", train=False, download=True, transform=transform_test)
  model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar100_resnet20", pretrained=True)
  num_classes = 100
  input_shape = testset[0][0].size()

#model = timm.create_model(model_name, pretrained=True).to(device)
model.eval()

m, M = 0., 0.

for i in range(len(testset)):
  x = testset[i][0]
  m = min(m, x.min().item())
  M = max(M, x.max().item())

clip_values = (m, M)

classifier = PyTorchClassifier(
    model=model,
    loss = nn.CrossEntropyLoss(),
    input_shape = input_shape,
    nb_classes= num_classes,
    clip_values=clip_values,
)

print("Loaded dataset:", dataset_name)
print("Total test images:", len(testset))

Using cache found in /root/.cache/torch/hub/chenyaofo_pytorch-cifar-models_master


Loaded dataset: cifar10
Total test images: 10000


In [None]:
clip_values

(-2.429065704345703, 2.7537312507629395)

In [None]:
def evaluate(model, loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            pred = logits.argmax(1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    return correct / total

testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

clean_acc = evaluate(model, testloader)
print("Clean accuracy:", clean_acc)


Clean accuracy: 0.926


In [None]:
def test_attack(classifier, attack, x, y):
    if isinstance(x, torch.Tensor):
        x_np = x.detach().cpu().numpy()
    else:
        x_np = x

    if isinstance(y, torch.Tensor):
        y_np = y.detach().cpu().numpy()
    else:
        y_np = y
    x_adv = attack.generate(x_np, y_np)
    preds = classifier.predict(x_adv).argmax(axis = 1)
    success = np.mean(preds != y_np)
    return success, x_adv

In [None]:
module = art.attacks.evasion
submodules = [
    name for name, obj in inspect.getmembers(module)
    if inspect.ismodule(obj)]

print(submodules)

['adversarial_asr', 'adversarial_patch', 'adversarial_texture', 'auto_attack', 'auto_conjugate_gradient', 'auto_projected_gradient_descent', 'boundary', 'brendel_bethge', 'carlini', 'composite_adversarial_attack', 'decision_tree_attack', 'deepfool', 'dpatch', 'dpatch_robust', 'elastic_net', 'fast_gradient', 'feature_adversaries', 'frame_saliency', 'geometric_decision_based_attack', 'graphite', 'hclu', 'hop_skip_jump', 'imperceptible_asr', 'importlib', 'iterative_method', 'laser_attack', 'lowprofool', 'momentum_iterative_method', 'newtonfool', 'over_the_air_flickering', 'overload', 'pe_malware_attack', 'pixel_threshold', 'projected_gradient_descent', 'rescaling_auto_conjugate_gradient', 'saliency_map', 'shadow_attack', 'sign_opt', 'simba', 'spatial_transformation', 'square_attack', 'steal_now_attack_later', 'targeted_universal_perturbation', 'universal_perturbation', 'virtual_adversarial', 'wasserstein', 'zoo']


In [None]:
module = art.attacks.evasion

classes = [
    name for name, obj in inspect.getmembers(module)
    if inspect.isclass(obj)]

print(classes)

['AdversarialPatch', 'AdversarialPatchNumpy', 'AdversarialPatchPyTorch', 'AdversarialPatchTensorFlowV2', 'AdversarialTexturePyTorch', 'AutoAttack', 'AutoConjugateGradient', 'AutoProjectedGradientDescent', 'BasicIterativeMethod', 'BoundaryAttack', 'BrendelBethgeAttack', 'CarliniL0Method', 'CarliniL2Method', 'CarliniLInfMethod', 'CarliniWagnerASR', 'CompositeAdversarialAttackPyTorch', 'DPatch', 'DecisionTreeAttack', 'DeepFool', 'ElasticNet', 'FastGradientMethod', 'FeatureAdversariesNumpy', 'FeatureAdversariesPyTorch', 'FeatureAdversariesTensorFlowV2', 'FrameSaliencyAttack', 'GRAPHITEBlackbox', 'GRAPHITEWhiteboxPyTorch', 'GeoDA', 'HighConfidenceLowUncertainty', 'HopSkipJump', 'ImperceptibleASR', 'ImperceptibleASRPyTorch', 'LaserAttack', 'LowProFool', 'MalwareGDTensorFlow', 'MomentumIterativeMethod', 'NewtonFool', 'OverTheAirFlickeringPyTorch', 'OverloadPyTorch', 'PixelAttack', 'ProjectedGradientDescent', 'ProjectedGradientDescentNumpy', 'ProjectedGradientDescentPyTorch', 'ProjectedGradien

In [None]:
help(module.ProjectedGradientDescent)

Help on class PixelAttack in module art.attacks.evasion.pixel_threshold:

class PixelAttack(PixelThreshold)
 |  PixelAttack(classifier: "'CLASSIFIER_NEURALNETWORK_TYPE'", th: 'int | None' = None, es: 'int' = 1, max_iter: 'int' = 100, targeted: 'bool' = False, verbose: 'bool' = False) -> 'None'
 |
 |  This attack was originally implemented by Vargas et al. (2019). It is generalisation of One Pixel Attack originally
 |  implemented by Su et al. (2019).
 |
 |  | One Pixel Attack Paper link: https://arxiv.org/abs/1710.08864
 |  | Pixel Attack Paper link: https://arxiv.org/abs/1906.06026
 |
 |  Method resolution order:
 |      PixelAttack
 |      PixelThreshold
 |      art.attacks.attack.EvasionAttack
 |      art.attacks.attack.Attack
 |      abc.ABC
 |      builtins.object
 |
 |  Methods defined here:
 |
 |  __init__(self, classifier: "'CLASSIFIER_NEURALNETWORK_TYPE'", th: 'int | None' = None, es: 'int' = 1, max_iter: 'int' = 100, targeted: 'bool' = False, verbose: 'bool' = False) -> 'None

In [None]:
new_loader = torch.utils.data.DataLoader(testset, batch_size=512, shuffle=False)

In [None]:
images, labels = next(iter(new_loader))
images, labels = images.to(device), labels.to(device)

evasion = art.attacks.evasion

epsilon = 6/255
num_pixels = 1

atk_fgsm = evasion.FastGradientMethod(classifier, eps=epsilon)
success, adv = test_attack(classifier, atk_fgsm, images, labels)
print("ART FGSM success:", success)

atk_pgd = evasion.ProjectedGradientDescent(classifier, eps=epsilon, max_iter= 30)
success, adv = test_attack(classifier, atk_pgd, images, labels)
print("ART PGD success:", success)

#Very slow
onepix = evasion.PixelAttack(classifier, th=num_pixels)
success, adv = test_attack(classifier, onepix, images[:6], labels[:6])
print("One‑pixel attack success:", success)

ART FGSM success: 0.634765625


PGD - Batches:   0%|          | 0/16 [00:00<?, ?it/s]

ART PGD success: 0.626953125
One‑pixel attack success: 0.3333333333333333


In [None]:
#Very slow
onepix = evasion.PixelAttack(classifier, th=num_pixels)
success, adv = test_attack(classifier, onepix, images[:26], labels[:26])
print("One‑pixel attack success:", success)

One‑pixel attack success: 0.38461538461538464
