
# TP adversarial images
### Diane LINGRAND 

diane.lingrand@univ-cotedazur.fr 

# Introduction
inspired by examples from https://foolbox.readthedocs.io/en/stable/

In [None]:
!pip uninstall foolbox -y
!pip install foolbox==2.4

In [3]:
from IPython.display import Image
import matplotlib.pyplot as plt
import random
import numpy as np
import foolbox
import tensorflow as tf
import eagerpy as ep
from foolbox.models import TensorFlowModel, Model
from foolbox.utils import accuracy, samples
from foolbox.attacks import LinfProjectedGradientDescentAttack

ImportError: cannot import name 'LinfProjectedGradientDescentAttack' from 'foolbox.attacks' (/home/qlr/anaconda3/lib/python3.8/site-packages/foolbox/attacks/__init__.py)

# LinfPG attack
Linf PGD stands for Projected Gradient Descent. In this attack, the model is known and will be used for the gradient descent. At each step, a neighborhood ball around the current position is examined (radius corresponding to the maximum amount of perturbation) for searching the minimum loss function value. The projection corresponds to moving the current position to the minimal loss function value position.

In [None]:
# loading the ResNet50 network
model = tf.keras.applications.ResNet50(weights="imagenet")
pre = dict(flip_axis=-1, mean=[104.0, 116.0, 123.0])  # RGB to BGR

fmodel = foolbox.models.TensorFlowModel(model, bounds=(0, 255), preprocessing=pre)
fmodel = fmodel.transform_bounds((0, 1))

In [None]:
# computing the accuracy of the ResNet50 network on the imagenet dataset

images, labels = samples(fmodel, dataset="imagenet", batchsize=16)
acc = accuracy(fmodel, images, labels)

print("accuracy:", acc * 100 ,"%")

In [None]:
%%time
#attack definition and computation (launch this box and go for a coffee)
epsilons = [0.001, 0.01, 0.1, 0.5]

attack = LinfPGD()
raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons)

In [None]:
print(success.numpy())

In [None]:
# calculate and report the robust accuracy (the accuracy of the model when it is attacked)

robust_accuracy = 1 - np.mean(success, axis=-1)

print("robust accuracy for perturbations with")
for eps, acc in zip(epsilons, robust_accuracy):
    
    print(f"  Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")

In [None]:
# we can also manually check this
# we will use the clipped advs instead of the raw advs, otherwise
# we would need to check if the perturbation sizes are actually
# within the specified epsilon bound

print("we can also manually check this:\n")
print("robust accuracy for perturbations with")

for eps, advs_ in zip(epsilons, clipped_advs):
    acc2 = accuracy(fmodel, advs_, labels)
    print(f"  Linf norm ≤ {eps:<6}: {acc2 * 100:4.1f} %")
    perturb = np.linalg.norm(advs_ - images)
    print("    perturbation sizes:", perturb)
    if acc2 == 0:
        break

In [None]:
print(images.shape)
print(type(images))
print(type(images.numpy()))

In [None]:
img = advs_[0].numpy()
l = np.array([img])
print(l.shape)
print(np.argmax(model.predict(l)))

In [None]:
print(type(labels.numpy()))
print(labels.numpy()[0])

In [None]:
# for a random image, display its adversarial and difference with the original image for different values of epsilon
index = random.randint(0,len(images)-1)
image = images.numpy()
img = image[index]
lab = labels.numpy()
print("original label:",lab[index])
plt.figure(figsize=(50,50))
ligne = 0 

for advs_ in (clipped_advs):
    adv = advs_.numpy()[index]
    diff = img - adv
    plt.subplot(13, 3, 1+ligne)
    plt.imshow(img)
    plt.subplot(13, 3, 2+ligne)
    plt.imshow(adv)
    plt.subplot(13, 3, 3+ligne)
    plt.imshow(np.abs(20*diff))
    ligne += 3

Modify the previous code in order to display the new label of adversarial images.

In [None]:
#this is for helping you to display class names instead of class labels
labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt')
imagenet_labels = np.array(open(labels_path).read().splitlines())

Can you target a specific class ?

In [None]:
# for a random image, display its adversarial and difference with the original image for different values of epsilon
index = random.randint(0,len(images)-1)
image = images.numpy()
img = image[index]
lab = labels.numpy()
ligne = 0

for i, advs_ in enumerate(clipped_advs):
    plt.figure(figsize=(5,15))
    adv = advs_.numpy()[index]
    res50_label = imagenet_labels[lab[index]+1]
    pred = imagenet_labels[np.argmax(model.predict(np.array([adv])))+1]
    #print("original label:",lab[index])
    print(f"label original: {res50_label}, new label: {pred}, epsilon: {epsilons[i]}")
    plt.subplot(2*len(clipped_advs), 2, 1+ligne)
    plt.imshow(img, label="sine")
    plt.subplot(2*len(clipped_advs), 2, 2+ligne)
    plt.imshow(adv, label="sine")
    ligne += 2

<hr>

In [None]:
!pip uninstall foolbox -y
!pip install foolbox

In [4]:
#!/usr/bin/env python3
"""
A simple example that demonstrates how to run a single attack against
a PyTorch ResNet-18 model for different epsilons and how to then report
the robust accuracy.
"""
import torchvision.models as models
import eagerpy as ep
from foolbox import PyTorchModel, accuracy, samples
from foolbox.attacks import LinfPGD


def main() -> None:
    # instantiate a model (could also be a TensorFlow or JAX model)
    model = models.resnet18(pretrained=True).eval()
    preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
    fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing)

    # get data and test the model
    # wrapping the tensors with ep.astensors is optional, but it allows
    # us to work with EagerPy tensors in the following
    images, labels = ep.astensors(*samples(fmodel, dataset="imagenet", batchsize=4))
    clean_acc = accuracy(fmodel, images, labels)
    print(f"clean accuracy:  {clean_acc * 100:.1f} %")

    # apply the attack
    attack = LinfPGD()
    epsilons = [
        0.0,
        0.0002,
        0.0005,
        0.0008,
        0.001,
        0.0015,
        0.002,
        0.003,
        0.01,
        0.1,
        0.3,
        0.5,
        1.0,
    ]
    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons)

    # calculate and report the robust accuracy (the accuracy of the model when
    # it is attacked)
    robust_accuracy = 1 - success.float32().mean(axis=-1)
    print("robust accuracy for perturbations with")
    for eps, acc in zip(epsilons, robust_accuracy):
        print(f"  Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")

    # we can also manually check this
    # we will use the clipped advs instead of the raw advs, otherwise
    # we would need to check if the perturbation sizes are actually
    # within the specified epsilon bound
    print()
    print("we can also manually check this:")
    print()
    print("robust accuracy for perturbations with")
    for eps, advs_ in zip(epsilons, clipped_advs):
        acc2 = accuracy(fmodel, advs_, labels)
        print(f"  Linf norm ≤ {eps:<6}: {acc2 * 100:4.1f} %")
        print("    perturbation sizes:")
        perturbation_sizes = (advs_ - images).norms.linf(axis=(1, 2, 3)).numpy()
        print("    ", str(perturbation_sizes).replace("\n", "\n" + "    "))
        if acc2 == 0:
            break

ModuleNotFoundError: No module named 'torchvision'

In [None]:
main()

In [None]:
import tensorflow as tf
import eagerpy as ep


def main() -> None:
    # instantiate a model (could also be a TensorFlow or JAX model)
    model = tf.keras.applications.ResNet50(weights="imagenet")
    pre = dict(flip_axis=-1, mean=[104.0, 116.0, 123.0])  # RGB to BGR
    fmodel: Model = TensorFlowModel(model, bounds=(0, 255), preprocessing=pre)
    fmodel = fmodel.transform_bounds((0, 1))

    # get data and test the model
    # wrapping the tensors with ep.astensors is optional, but it allows
    # us to work with EagerPy tensors in the following
    images, labels = ep.astensors(*samples(fmodel, dataset="imagenet", batchsize=16))
    clean_acc = accuracy(fmodel, images, labels)
    print(f"clean accuracy:  {clean_acc * 100:.1f} %")

    # apply the attack
    attack = LinfPGD()
    epsilons = [
        0.0,
        0.0002,
        0.0005,
        0.0008,
        0.001,
        0.0015,
        0.002,
        0.003,
        0.01,
        0.1,
        0.3,
        0.5,
        1.0,
    ]
    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons)

    # calculate and report the robust accuracy (the accuracy of the model when
    # it is attacked)
    robust_accuracy = 1 - success.float32().mean(axis=-1)
    print("robust accuracy for perturbations with")
    for eps, acc in zip(epsilons, robust_accuracy):
        print(f"  Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")

    # we can also manually check this
    # we will use the clipped advs instead of the raw advs, otherwise
    # we would need to check if the perturbation sizes are actually
    # within the specified epsilon bound
    print()
    print("we can also manually check this:")
    print()
    print("robust accuracy for perturbations with")
    for eps, advs_ in zip(epsilons, clipped_advs):
        acc2 = accuracy(fmodel, advs_, labels)
        print(f"  Linf norm ≤ {eps:<6}: {acc2 * 100:4.1f} %")
        print("    perturbation sizes:")
        perturbation_sizes = (advs_ - images).norms.linf(axis=(1, 2, 3)).numpy()
        print("    ", str(perturbation_sizes).replace("\n", "\n" + "    "))
        if acc2 == 0:
            break


In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
main()