<a href="https://colab.research.google.com/github/pavanreddyml/FLAIRS-38-Material/blob/main/noetbooks/01%20-%20Attacks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hands On Adversarial Attacks

# Setup

In [None]:
!pip install adversarial-lab

In [None]:
!git clone https://github.com/pavanreddyml/FLAIRS-38-Material.git

In [None]:
from adversarial_lab.core.optimizers import PGD
from adversarial_lab.core.losses import CategoricalCrossEntropy
from adversarial_lab.attacker.whitebox import WhiteBoxMisclassification
from adversarial_lab.core.noise_generators import AdditiveNoiseGenerator
from adversarial_lab.core.preprocessing import PreprocessingFromFunction
from adversarial_lab.core.constraints import POClip, PONoisedSampleBounding
from adversarial_lab.callbacks import EarlyStopping
from adversarial_lab.arsenal.whitebox import *

from adversarial_lab.utils import Plotting

import os
import numpy as np
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing import image

In [None]:
MODEL = "InceptionV3"     # Supported models: InceptionV3, ResNet50, MobileNetV2, Digits
IMAGES_DIR = os.path.join("FLAIRS-38-Material", "data", "animals")     # Directory containing the images
MODEL_DIR = os.path.join("FLAIRS-38-Material", "models")               # Directory containing the model

In [None]:
if MODEL == "InceptionV3":
    from tensorflow.keras.applications import InceptionV3
    from tensorflow.keras.applications.inception_v3 import preprocess_input, decode_predictions
    model = InceptionV3(weights='imagenet')
    input_shape = (299, 299, 3)
elif MODEL == "ResNet50":
    from tensorflow.keras.applications import ResNet50
    from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
    model = ResNet50(weights='imagenet')
    input_shape = (224, 224, 3)
elif MODEL == "MobileNetV2":
    from tensorflow.keras.applications import MobileNetV2
    from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
    model = MobileNetV2(weights='imagenet')
    input_shape = (224, 224, 3)
else:
  raise ValueError(f"Unsupported Model: {MODEL}")

In [None]:
def preprocess(sample, *args, **kwargs):
    input_sample = tf.cast(sample, dtype=tf.float32)
    if len(input_sample.shape) == 2:
        input_sample = tf.expand_dims(input_sample, axis=-1)
        input_sample = tf.image.grayscale_to_rgb(input_sample)

    elif len(input_sample.shape) == 3 and input_sample.shape[-1] == 1:
        input_sample = tf.image.grayscale_to_rgb(input_sample)

    input_tensor = tf.convert_to_tensor(input_sample, dtype=tf.float32)
    resized_image = tf.image.resize(input_tensor, input_shape[:2])
    batch_image = tf.expand_dims(resized_image, axis=0)
    return preprocess_input(batch_image)

In [None]:
def get_image_array(image_path):
    image = Image.open(image_path)
    image_array = np.array(image)
    return image_array

# Whitebox Attacks

## Fast Sign Gradient Method (FSGM)

In [None]:
IMAGE = "panda.jpg"
EPSILON = 2
EPSILONS = [1, 2, 5, 10, 20, 50, 100]
TARGET_CLASS = 924
ON_ORIGINAL = True
VERBOSE = 1

In [None]:
image_path = os.path.join(IMAGES_DIR, IMAGE)
image_array = get_image_array(image_path)

In [None]:
predictions = model.predict(preprocess(image_array), verbose=0)
print("Predicted class:", decode_predictions(predictions, top=1)[0][0][1])
print("Predicted class index:", np.argmax(predictions, axis=1)[0])
print("Predicted class probability:", np.max(predictions, axis=1)[0])

In [None]:
attacker = FastSignGradientMethodAttack(
    model=model,
    preprocessing_fn=PreprocessingFromFunction.create(preprocess),
    epsilon=float(EPSILON),
    binary=False,
    verbose=VERBOSE,
)

noise, noise_meta = attacker.attack(
    sample=image_array,
    target_class=TARGET_CLASS,
    on_original=ON_ORIGINAL, # This determines if the attack is performed on the original image or on the preprocessed one
)

Plotting.plot_images_and_noise(image_array,
                               noise,
                               config={
                                   "title": f"Fast Sign Gradient Method (Epsilon: {EPSILON})",
                               })

In [None]:
print(
    f"Epsilon: {EPSILON}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n",
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
)

In [None]:
noises = []
predicted_classes = []
stats = []

for eps in EPSILONS:
    attacker = FastSignGradientMethodAttack(
        model=model,
        preprocessing_fn=PreprocessingFromFunction.create(preprocess),
        epsilon=float(eps),
        binary=False,
        verbose=0,
    )

    noise, noise_meta = attacker.attack(
        sample=image_array,
        target_class=TARGET_CLASS,
        on_original=ON_ORIGINAL,
    )

    stats.append(
    f"Epsilon: {eps}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
    f"Noise mean: {np.mean(noise)}\n"
    )

    noises.append(noise)
    predicted_classes.append(np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0])

for n, pc, s, eps in zip(noises, predicted_classes, stats, EPSILONS):
    print(s)
    Plotting.plot_images_and_noise(image_array,
                                n,
                                config={
                                    "title": f"Fast Sign Gradient Method (Epsilon: {eps}, predicted class: {pc})",
                                })

## BIM And PGD

In [None]:
IMAGE = "jellyfish.jpg"
EPSILON = 2
EPSILONS = [1, 2, 5, 10, 20, 50, 100]
LEARNING_RATE = 1
TARGET_CLASS = 924
TARGET_CLASS_CONFIDENCE = 0.5
ON_ORIGINAL = True
VERBOSE = 3
EPOCHS = 20

In [None]:
image_path = os.path.join(IMAGES_DIR, IMAGE)
image_array = get_image_array(image_path)

In [None]:
predictions = model.predict(preprocess(image_array), verbose=0)
print("Predicted class:", decode_predictions(predictions, top=1)[0][0][1])
print("Predicted class index:", np.argmax(predictions, axis=1)[0])
print("Predicted class probability:", np.max(predictions, axis=1)[0])

In [None]:
attacker = ProjectedGradientDescentAttack(
    model=model,
    preprocessing_fn=PreprocessingFromFunction.create(preprocess),
    learning_rate=float(LEARNING_RATE),
    epsilon=float(EPSILON),
    binary=False,
    verbose=VERBOSE,
    callbacks=[EarlyStopping(trigger="misclassification", confidence=0.5)],
)

noise, noise_meta = attacker.attack(
    sample=image_array,
    target_class=TARGET_CLASS,
    on_original=ON_ORIGINAL,
    epochs=EPOCHS
)

Plotting.plot_images_and_noise(image_array,
                               noise,
                               config={
                                   "title": f"BIM (Epsilon: {EPSILON}, Learning Rate: {LEARNING_RATE})",
                               })

In [None]:
print(
    f"Epsilon: {EPSILON}\n"
    f"Learning Rate: {LEARNING_RATE}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
)

In [None]:
noises = []
predicted_classes = []
stats = []

for eps in EPSILONS:
    attacker = ProjectedGradientDescentAttack(
    model=model,
    preprocessing_fn=PreprocessingFromFunction.create(preprocess),
    learning_rate=float(LEARNING_RATE),
    epsilon=float(eps),
    binary=False,
    verbose=VERBOSE,
    callbacks=[EarlyStopping(trigger="misclassification", confidence=TARGET_CLASS_CONFIDENCE)],
    )

    noise, noise_meta = attacker.attack(
        sample=image_array,
        target_class=TARGET_CLASS,
        on_original=ON_ORIGINAL,
        epochs=EPOCHS
    )

    stats.append(
    f"Epsilon: {EPSILON}\n"
    f"Learning Rate: {LEARNING_RATE}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
    )

    noises.append(noise)
    predicted_classes.append(np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0])

for n, pc, s, eps in zip(noises, predicted_classes, stats, EPSILONS):
    print(s)
    Plotting.plot_images_and_noise(image_array,
                                n,
                                config={
                                    "title": f"Basic Iterative Method (Epsilon: {eps}, predicted class: {pc})",
                                })

## Carlini Wagner Attack (C&W)

In [None]:
IMAGE = "panda.jpg"
C=50,
C_VALS = [1, 2, 5, 10, 20, 50, 100, 200]
KAPPA=0.5,
LEARNING_RATE = 1
TARGET_CLASS = 924
TARGET_CLASS_CONFIDENCE = 0.5
ON_ORIGINAL = True
VERBOSE = 3
EPOCHS = 20

In [None]:
image_path = os.path.join(IMAGES_DIR, IMAGE)
image_array = get_image_array(image_path)

In [None]:
predictions = model.predict(preprocess(image_array), verbose=0)
print("Predicted class:", decode_predictions(predictions, top=1)[0][0][1])
print("Predicted class index:", np.argmax(predictions, axis=1)[0])
print("Predicted class probability:", np.max(predictions, axis=1)[0])

In [None]:
attacker = CarliniWagnerAttack(
    model=model,
    preprocessing_fn=PreprocessingFromFunction.create(preprocess),
    learning_rate=float(LEARNING_RATE),
    C=C,
    kappa=KAPPA,
    binary=False,
    verbose=VERBOSE,
    callbacks=[EarlyStopping(trigger="misclassification", confidence=TARGET_CLASS_CONFIDENCE)],
)

noise, noise_meta = attacker.attack(
    sample=image_array,
    target_class=TARGET_CLASS,
    on_original=ON_ORIGINAL, # This determines if the attack is performed on the original image or on the preprocessed one
    epochs=EPOCHS
)

Plotting.plot_images_and_noise(image_array,
                               noise,
                               config={
                                   "title": f"C&W Attack (C: {C}, Kappa: {KAPPA})",
                               })

In [None]:
print(
    f"C: {C}\n"
    f"KAPPA: {KAPPA}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
)

In [None]:
noises = []
predicted_classes = []
stats = []

for c_val in C_VALS:
    attacker = CarliniWagnerAttack(
    model=model,
    preprocessing_fn=PreprocessingFromFunction.create(preprocess),
    learning_rate=float(LEARNING_RATE),
    C=c_val,
    kappa=KAPPA,
    binary=False,
    verbose=VERBOSE,
    callbacks=[EarlyStopping(trigger="misclassification", confidence=0.5)],
    )

    noise, noise_meta = attacker.attack(
        sample=image_array,
        target_class=TARGET_CLASS,
        on_original=ON_ORIGINAL, # This determines if the attack is performed on the original image or on the preprocessed one
        epochs=20
    )

    stats.append(
    f"C: {c_val}\n"
    "Kappa: {KAPPA}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
    )

    noises.append(noise)
    predicted_classes.append(np.argmax(model.predict(preprocess(image_array + noise), verbose=0), axis=1)[0])

for n, pc, s, c_val in zip(noises, predicted_classes, stats, C_VALS):
    print(s)
    Plotting.plot_images_and_noise(image_array,
                                n,
                                config={
                                    "title": f"C&W Attack (C: {c_val}, Kappa: {KAPPA}, predicted class: {pc})",
                                })

## Deepfool

In [None]:
IMAGE = "panda.jpg"
OVERSHOOT = 0.1
OVERSHOOT_VALS = [0.1, 0.25, 0.5]
TARGET_CLASS = 924
TARGET_CLASS_CONFIDENCE = 0.3
ON_ORIGINAL = False                     # Deep Fool does not support on_original=True
VERBOSE = 3
EPOCHS = 20
EFFICIENT_MODE = 20

In [None]:
image_path = os.path.join(IMAGES_DIR, IMAGE)
image_array = get_image_array(image_path)

In [None]:
predictions = model.predict(preprocess(image_array), verbose=0)
print("Predicted class:", decode_predictions(predictions, top=1)[0][0][1])
print("Predicted class index:", np.argmax(predictions, axis=1)[0])
print("Predicted class probability:", np.max(predictions, axis=1)[0])

In [None]:
early_stopping = EarlyStopping(trigger="misclassification", confidence=0.2)
preprocessing_fn = PreprocessingFromFunction.create(preprocess)

attacker = DeepFoolAttack(
    model=model,
    preprocessing_fn=preprocessing_fn,
    epsilon=EPSILON,
    overshoot=OVERSHOOT,
    callbacks=[early_stopping],
    efficient_mode=EFFICIENT_MODE,
)

noise, noise_meta = attacker.attack(
    sample=image_array,
    target_class=924,
    on_original=ON_ORIGINAL,
    epochs=EPOCHS,
)

Plotting.plot_images_and_noise(preprocessing_fn.preprocess(image_array).numpy(),
                               noise,
                               config={
                                   "title": f"Deep Fool (Overshoot: {OVERSHOOT})",
                               })

In [None]:
print(
    f"Overshoot: {OVERSHOOT}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
)

In [None]:
noises = []
predicted_classes = []
stats = []

for o_val in OVERSHOOT_VALS:
    early_stopping = EarlyStopping(trigger="misclassification", confidence=0.2)
    preprocessing_fn = PreprocessingFromFunction.create(preprocess)

    attacker = DeepFoolAttack(
        model=model,
        preprocessing_fn=preprocessing_fn,
        epsilon=EPSILON,
        overshoot=OVERSHOOT,
        callbacks=[early_stopping],
        efficient_mode=EFFICIENT_MODE,
    )

    noise, noise_meta = attacker.attack(
        sample=image_array,
        target_class=924,
        on_original=ON_ORIGINAL,
        epochs=EPOCHS
    )

    stats.append(
    f"Overshoot: {o_val}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
    )

    noises.append(noise)
    predicted_classes.append(np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0])

for n, pc, s, o_val in zip(noises, predicted_classes, stats, OVERSHOOT_VALS):
    print(s)
    Plotting.plot_images_and_noise(preprocessing_fn.preprocess(image_array).numpy(),
                                n,
                                config={
                                    "title": f"Deep Fool (Overshoot: {o_val}, predicted class: {pc})",
                                })

## Smooth Fool

In [None]:
IMAGE = "panda.jpg"
OVERSHOOT = 0.1
SIGMA = 0.5
SIGMA_VALS = [0.1, 0.25, 0.5, 0.8]
KERNEL_SIZE = 3
KERNEL_SIZE_VALS = [3, 5, 7, 9, 11]
TARGET_CLASS = 924
TARGET_CLASS_CONFIDENCE = 0.3
ON_ORIGINAL = False                     # Smooth Fool does not support on_original=True
VERBOSE = 3
EPOCHS = 20
EFFICIENT_MODE = 20

In [None]:
image_path = os.path.join(IMAGES_DIR, IMAGE)
image_array = get_image_array(image_path)

In [None]:
predictions = model.predict(preprocess(image_array), verbose=0)
print("Predicted class:", decode_predictions(predictions, top=1)[0][0][1])
print("Predicted class index:", np.argmax(predictions, axis=1)[0])
print("Predicted class probability:", np.max(predictions, axis=1)[0])

In [None]:
early_stopping = EarlyStopping(trigger="misclassification", confidence=0.2)
preprocessing_fn = PreprocessingFromFunction.create(preprocess)

attacker = SmoothFoolAttack(
    model=model,
    preprocessing_fn=preprocessing_fn,
    epsilon=EPSILON,
    overshoot=OVERSHOOT,
    sigma=SIGMA,
    kernel_size=KERNEL_SIZE,
    callbacks=[early_stopping],
    efficient_mode=EFFICIENT_MODE,
)

noise, noise_meta = attacker.attack(
    sample=image_array,
    target_class=924,
    epochs=EPOCHS,
)

Plotting.plot_images_and_noise(preprocessing_fn.preprocess(image_array).numpy(), noise)

In [None]:
print(
    f"Overshoot: {OVERSHOOT}\n"
    f"Sigma: {SIGMA}\n"
    f"Kernel size: {KERNEL_SIZE}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
)

In [None]:
noises = []
predicted_classes = []
stats = []

for s_val in SIGMA_VALS:
    early_stopping = EarlyStopping(trigger="misclassification", confidence=0.2)
    preprocessing_fn = PreprocessingFromFunction.create(preprocess)

    attacker = SmoothFoolAttack(
        model=model,
        preprocessing_fn=preprocessing_fn,
        epsilon=EPSILON,
        overshoot=OVERSHOOT,
        sigma=s_val,
        kernel_size=KERNEL_SIZE,
        callbacks=[early_stopping],
        efficient_mode=EFFICIENT_MODE,
    )

    noise, noise_meta = attacker.attack(
        sample=image_array,
        target_class=924,
        epochs=EPOCHS,
    )

    stats.append(
    f"Overshoot: {OVERSHOOT}\n"
    f"Sigma: {SIGMA}\n"
    f"Kernel size: {KERNEL_SIZE}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
    )

    noises.append(noise)
    predicted_classes.append(np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0])

for n, pc, s, sig in zip(noises, predicted_classes, stats, SIGMA_VALS):
    print(s)
    Plotting.plot_images_and_noise(preprocessing_fn.preprocess(image_array).numpy(),
                                n,
                                config={
                                    "title": f"Smooth Fool (Overshoot: {eps}, Sigma: {sig}, Kernel size: {KERNEL_SIZE}, predicted class: {pc})",
                                })

In [None]:
noises = []
predicted_classes = []
stats = []
for K_val in KERNEL_SIZE_VALS:
    early_stopping = EarlyStopping(trigger="misclassification", confidence=0.2)
    preprocessing_fn = PreprocessingFromFunction.create(preprocess)

    attacker = SmoothFoolAttack(
        model=model,
        preprocessing_fn=preprocessing_fn,
        epsilon=EPSILON,
        overshoot=OVERSHOOT,
        sigma=SIGMA,
        kernel_size=K_val,
        callbacks=[early_stopping],
        efficient_mode=EFFICIENT_MODE,
    )

    noise, noise_meta = attacker.attack(
        sample=image_array,
        target_class=924,
        epochs=EPOCHS,
    )

    stats.append(
    f"Overshoot: {OVERSHOOT}\n"
    f"Sigma: {SIGMA}\n"
    f"Kernel size: {KERNEL_SIZE}\n"
    f"Original class: {np.argmax(model.predict(preprocess(image_array), verbose=0), axis=1)[0]}\n"
    f"Target class: {TARGET_CLASS}\n"
    f"Adversarial class: {np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Adversarial class confidence: {np.max(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0]}\n"
    f"Noise min: {np.min(noise)}\n"
    f"Noise max: {np.max(noise)}\n"
    )

    noises.append(noise)
    predicted_classes.append(np.argmax(model.predict(preprocess(image_array) + noise, verbose=0), axis=1)[0])

for n, pc, s, ker in zip(noises, predicted_classes, stats, KERNEL_SIZE_VALS):
    print(s)
    Plotting.plot_images_and_noise(preprocessing_fn.preprocess(image_array).numpy(),
                                n,
                                config={
                                    "title": f"Smooth Fool (Overshoot: {eps}, Sigma: {SIGMA}, Kernel size: {ker}, predicted class: {pc})",
                                })

# Blackbox Attacks

## Setup

In [None]:
IMAGES_DIR = os.path.join("FLAIRS-38-Material", "data", "digits")     # Directory containing the images
MODEL_DIR = os.path.join("FLAIRS-38-Material", "models")               # Directory containing the model
DIGIT = 1
IMAGE_NAME = "1.png"

In [None]:
model = tf.keras.models.load_model(os.path.join(MODEL_DIR, "mnist_digits.h5"))
input_shape = (28, 28)

def preprocess_input(x):
    x = tf.image.resize(x, input_shape[:2])
    x = tf.image.rgb_to_grayscale(x)
    x = tf.cast(x, tf.float32) / 255.0
    return x

def decode_predictions(preds, top=1):
    class_indices = np.argsort(preds)[::-1][:top]
    results = []
    for i in class_indices:
        results.append([("digit_{}".format(i), i, float(preds[i]))])
    return results

In [None]:
def blackbox_predict_array(image_array):
    preprocessed = preprocess(image_array)
    preds = model(preprocessed)
    return preds.numpy().squeeze()

def blackbox_predict_label(image_array):
    preds = blackbox_predict_array(image_array)
    return int(np.argmax(preds))

In [None]:
image_path = os.path.join(IMAGES_DIR, str(DIGIT), IMAGE_NAME)
image_array = get_image_array(image_path)

In [None]:
blackbox_predict_array(image_array)

In [None]:
blackbox_predict_label(image_array)

In [None]:
decode_predictions(blackbox_predict_array(image_array))

## ZOO Attack

In [None]:
EPSILON = 1
N_SAMPLES_FOR_GRADIENT = 200
EPOCHS = 100
LEARNING_RATE = 1

In [None]:
def finite_difference_gradient(predict_fn, x, eps=EPSILON, samples=N_SAMPLES_FOR_GRADIENT):
    grads = np.zeros_like(x, dtype=np.float32)
    orig_label = np.argmax(predict_fn(x))
    for _ in range(samples):
        u = np.random.normal(size=x.shape).astype(np.float32)
        u /= np.linalg.norm(u)
        fx1 = predict_fn(np.clip(x + eps * u, 0, 255))[orig_label]
        fx2 = predict_fn(np.clip(x - eps * u, 0, 255))[orig_label]
        directional_derivative = (fx1 - fx2) / (2 * eps)
        grads += directional_derivative * u
    return grads / samples

def nes_gradient(predict_fn, x, eps=EPSILON, samples=N_SAMPLES_FOR_GRADIENT):
    grads = np.zeros_like(x, dtype=np.float32)
    orig_label = np.argmax(predict_fn(x))
    for _ in range(samples):
        noise = np.random.normal(size=x.shape).astype(np.float32)
        noise /= np.linalg.norm(noise)
        fx1 = predict_fn(np.clip(x + eps * noise, 0, 255))[orig_label]
        fx2 = predict_fn(np.clip(x - eps * noise, 0, 255))[orig_label]
        directional_derivative = (fx1 - fx2) / (2 * eps)
        grads += directional_derivative * noise
    return grads / samples

def zoo_attack(x, y, predict_fn, max_iters=EPOCHS, lr=LEARNING_RATE, grad_fn=finite_difference_gradient):
    adv = x.copy().astype(np.float32)
    for _ in range(max_iters):
        grads = grad_fn(predict_fn, adv)
        adv = adv - lr * np.sign(grads)
        adv = np.clip(adv, 0, 255)
        if _ % 10 == 0:
          print(f"Iter: {_}, Adversarial class: {np.argmax(predict_fn(adv))}, confidence: {np.max(predict_fn(adv))}")
        if np.argmax(predict_fn(adv)) != y:
            print(f"Iter: {_}, Adversarial class: {np.argmax(predict_fn(adv))}, confidence: {np.max(predict_fn(adv))}")
            break
    return adv - x

In [None]:
image_path = os.path.join(IMAGES_DIR, str(DIGIT), IMAGE_NAME)
image_array = get_image_array(image_path)

### Finite Difference ZOO

In [None]:
finite_diff_noise = zoo_attack(image_array, blackbox_predict_label(image_array), blackbox_predict_array, grad_fn=finite_difference_gradient)

In [None]:
Plotting.plot_images_and_noise(image_array,
                               finite_diff_noise,
                               config={
                                   "title": f"Zoo Attack (Finite Difference Gradient)",
                               })

In [None]:
print(
    f"Epsilon: {EPSILON}\n"
    f"N Samples For Gradient: {N_SAMPLES_FOR_GRADIENT}\n"
    f"Learning Rate: {LEARNING_RATE}\n"
    f"Original class: {np.argmax(blackbox_predict_array(image_array), axis=0)}\n"
    f"Adversarial class: {np.argmax(blackbox_predict_array(image_array+finite_diff_noise), axis=0)}\n"
    f"Adversarial class confidence: {np.max(blackbox_predict_array(image_array+finite_diff_noise), axis=0)}\n"
    f"Noise min: {np.min(finite_diff_noise)}\n"
    f"Noise max: {np.max(noise)}\n"
    )

NES ZOO

In [None]:
nes_noise = zoo_attack(image_array, blackbox_predict_label(image_array), blackbox_predict_array, grad_fn=nes_gradient)

In [None]:
Plotting.plot_images_and_noise(image_array,
                               nes_noise,
                               config={
                                   "title": f"Zoo Attack (NES)",
                               })

In [None]:
print(
    f"Epsilon: {EPSILON}\n"
    f"N Samples For Gradient: {N_SAMPLES_FOR_GRADIENT}\n"
    f"Learning Rate: {LEARNING_RATE}\n"
    f"Original class: {np.argmax(blackbox_predict_array(image_array), axis=0)}\n"
    f"Adversarial class: {np.argmax(blackbox_predict_array(image_array+nes_noise), axis=0)}\n"
    f"Adversarial class confidence: {np.max(blackbox_predict_array(image_array+nes_noise), axis=0)}\n"
    f"Noise min: {np.min(nes_noise)}\n"
    f"Noise max: {np.max(nes_noise)}\n"
    )

## Square Attack

In [None]:
EPSILON = 16
EPOCHS = 100

In [None]:
def square_attack(x, y, predict_fn, eps=EPSILON, max_iters=EPOCHS):
    adv = x.copy().astype(np.float32)
    h, w = x.shape
    for _ in range(max_iters):
        if _ % 10 == 0:
            print(f"Iter: {_}, Adversarial class: {np.argmax(predict_fn(adv))}, confidence: {np.max(predict_fn(adv))}")
        if np.argmax(predict_fn(adv)) != y:
            print(f"Iter: {_}, Adversarial class: {np.argmax(predict_fn(adv))}, confidence: {np.max(predict_fn(adv))}")
            break
        s = np.random.randint(1, min(h, w) // 2)
        x0 = np.random.randint(0, h - s)
        y0 = np.random.randint(0, w - s)
        noise = np.random.randint(0, 2, size=(s, s)) * 2 - 1
        patch = adv[x0:x0+s, y0:y0+s] + eps * noise
        adv[x0:x0+s, y0:y0+s] = np.clip(patch, 0, 255)
    return adv - x

In [None]:
square_attack_noise = square_attack(image_array, blackbox_predict_label(image_array), blackbox_predict_array)

In [None]:
Plotting.plot_images_and_noise(image_array,
                               square_attack_noise,
                               config={
                                   "title": f"Square Attack",
                               })

In [None]:
print(
    f"Epsilon: {EPSILON}\n"
    f"Original class: {np.argmax(blackbox_predict_array(image_array), axis=0)}\n"
    f"Adversarial class: {np.argmax(blackbox_predict_array(image_array+square_attack_noise), axis=0)}\n"
    f"Adversarial class confidence: {np.max(blackbox_predict_array(image_array+square_attack_noise), axis=0)}\n"
    f"Noise min: {np.min(square_attack_noise)}\n"
    f"Noise max: {np.max(square_attack_noise)}\n"
    )

## Label Only Attack

In [None]:
EPSILON = 1
EPOCHS = 1000
PIXELS_PER_ITER = 84

In [None]:
def label_only_attack(x, y, predict_fn, max_iters=EPOCHS, eps=EPSILON, pixels_per_iter=PIXELS_PER_ITER):
    adv = x.copy().astype(np.float32)

    if predict_fn(adv) != y:
        return adv - x

    h, w = x.shape
    for _ in range(max_iters):
        noise = np.zeros_like(x)
        for _ in range(pixels_per_iter):
            x0 = np.random.randint(0, h)
            y0 = np.random.randint(0, w)
            noise[x0, y0] = np.random.choice([-eps, eps])

        candidate = np.clip(adv + noise, 0, 255)
        if predict_fn(candidate) != y:
            return candidate - x

    return np.zeros_like(x)


In [None]:
label_attack_noise = label_only_attack(image_array, blackbox_predict_label(image_array), blackbox_predict_label)

In [None]:
Plotting.plot_images_and_noise(image_array,
                               label_attack_noise,
                               config={
                                   "title": f"Label Only Attack",
                               })

In [None]:
print(
    f"Epsilon: {EPSILON}\n"
    f"Pixels per Iter: {PIXELS_PER_ITER}\n"
    f"Original class: {np.argmax(blackbox_predict_array(image_array), axis=0)}\n"
    f"Adversarial class: {np.argmax(blackbox_predict_array(image_array+label_attack_noise), axis=0)}\n"
    f"Adversarial class confidence: {np.max(blackbox_predict_array(image_array+label_attack_noise), axis=0)}\n"
    f"Noise min: {np.min(label_attack_noise)}\n"
    f"Noise max: {np.max(label_attack_noise)}\n"
    )