In [None]:
!git clone https://github.com/seba20-0/Adversarial-Attacks-on-YOLO.git

In [None]:
%cd Adversarial-Attacks-on-YOLO/

# Adversarial Attacks on Pretrained Vision Models

In this notebook, I will demonstrate how to perform adversarial attacks on a pretrained vision model without retraining it. The workflow will be as follows:

1. **Model Selection**: Load a pretrained vision model (e.g., ResNet, VGG) from a popular library such as PyTorch or TensorFlow.
2. **Dataset**: Use images from the ImageNet dataset, focusing on a specific class for targeted attacks.
3. **Prediction**: Pass the selected images through the model to obtain baseline predictions.
4. **Adversarial Attack**: Apply an adversarial attack method (e.g., FGSM, PGD) to the images of the chosen class.
5. **Evaluation**: Compare the model's predictions on clean vs. adversarial images to assess the effectiveness of the attack.

No model training will be performed; the focus is solely on testing and attacking the pretrained model using ImageNet class images.

In [None]:
import torch
import torch.nn as nn
from torchvision import models

# Load pretrained VGG16 model

# Set model to evaluation mode
model.eval()

In [None]:
from PIL import Image
from torchvision import transforms
import json
import requests

# Load and preprocess the image
img_path = ''
image = Image.open(img_path).convert('RGB')


# transform the image resize, centercrop, ToTensor, and normalize using imagenet mean and std
preprocess = _


# preprocess and add batch dimension
input_tensor = _

# Inference
with torch.no_grad():
    output = model(input_tensor)
    _, predicted = output.max(1)




In [None]:
# Download ImageNet class labels
LABELS_URL = 'https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt'
labels = requests.get(LABELS_URL).text.splitlines()

In [None]:
import matplotlib.pyplot as plt

# Display the image
plt.imshow(image)
plt.axis('off')
plt.title('Input Image')
plt.show()

# Print predicted class
print('Predicted class:', labels[predicted.item()])

In [None]:
# Get top 5 predictions and their probabilities
with torch.no_grad():
    probs = _
    top5_probs, top5_indices = _

# Display top 5 predicted classes and probabilities
for i in range(5):
    idx = top5_indices[0, i].item()
    prob = top5_probs[0, i].item()
    print(f"{i+1}. {labels[idx]} ({prob:.4f})")

## FGSM

In [None]:
from attacks.fgsm import FGSM
import numpy as np


# Prepare label for untargeted attack (use predicted label)
label_tensor = _

# Instantiate FGSM
epsilon = 0.4
fgsm = _

# Generate adversarial example
adv_image = fgsm.attack(_)

# Display adversarial image
adv_img_np = adv_image.squeeze().detach().cpu().numpy().transpose(1,2,0)
adv_img_np = adv_img_np * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]  # Unnormalize
adv_img_np = adv_img_np.clip(0, 1)

# Calculate perturbation
perturbation = (adv_image - input_tensor).squeeze().detach().cpu().numpy().transpose(1,2,0)
perturbation = perturbation * [0.229, 0.224, 0.225]  # Unnormalize (no mean added for noise)
perturbation = perturbation.clip(-0.5, 0.5)  # Clip for visualization

# Show original image
orig_img_np = input_tensor.squeeze().detach().cpu().numpy().transpose(1,2,0)
orig_img_np = orig_img_np * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]
orig_img_np = orig_img_np.clip(0, 1)

fig, axs = plt.subplots(1, 3, figsize=(15, 5))
axs[0].imshow(orig_img_np)
axs[0].set_title('Original Image')
axs[0].axis('off')

axs[1].imshow(perturbation, cmap='seismic')
axs[1].set_title('Perturbation (Noise)')
axs[1].axis('off')

axs[2].imshow(adv_img_np)
axs[2].set_title('Adversarial Image')
axs[2].axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Predict the label of the adversarial image
with torch.no_grad():
  # get the model output on adv_image
    adv_output = _
    #extract the predicted clas
    _, adv_predicted = adv_output.max(1)

# Show adversarial image with predicted label
plt.imshow(adv_img_np)
plt.axis('off')
plt.title(f'Adversarial Image\nPredicted: {labels[adv_predicted.item()]}')
plt.show()

In [None]:
# Find the ImageNet class index for 'balloon '
target_class_name = 'balloon'
target_class_idx = labels.index(target_class_name)  # Get index for balloon
target_label_tensor = torch.tensor([target_class_idx])

# Instantiate FGSM for targeted attack
epsilon = 0.1

# instantiate fgsm with targeted as True
fgsm_targeted = _

# Generate targeted adversarial example from target_label_tensor
adv_image_targeted = _

# Display targeted adversarial image
adv_img_targeted_np = adv_image_targeted.squeeze().detach().cpu().numpy().transpose(1,2,0)
adv_img_targeted_np = adv_img_targeted_np * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]  # Unnormalize
adv_img_targeted_np = adv_img_targeted_np.clip(0, 1)

plt.imshow(adv_img_targeted_np)
plt.axis('off')
plt.title('Targeted Adversarial Image (balloon )')
plt.show()

# Predict the label of the targeted adversarial image
with torch.no_grad():
    adv_output_targeted = model(adv_image_targeted)
    _, adv_predicted_targeted = adv_output_targeted.max(1)

print(f"Predicted class for targeted attack: {labels[adv_predicted_targeted.item()]}")

## BIM

In [None]:
# Targeted BIM attack to force prediction as 'balloon'
from attacks.bim import BIM

# BIM parameters
epsilon = 0.1
alpha = 0.1
num_iterations = 60
targeted = True


# Instantiate BIM for targeted attack
bim_targeted = _

# Generate targeted adversarial example using target_label_tensor
adv_image_bim = _

# Display targeted adversarial image
adv_img_bim_np = adv_image_bim.squeeze().detach().cpu().numpy().transpose(1,2,0)
adv_img_bim_np = adv_img_bim_np * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]  # Unnormalize
adv_img_bim_np = adv_img_bim_np.clip(0, 1)
# Show original image, perturbation, and BIM adversarial image side by side
perturbation_bim = (adv_image_bim - input_tensor).squeeze().detach().cpu().numpy().transpose(1,2,0)
perturbation_bim = perturbation_bim * [0.229, 0.224, 0.225]  # Unnormalize (no mean added for noise)
perturbation_bim = perturbation_bim.clip(-0.5, 0.5)  # Clip for visualization

fig, axs = plt.subplots(1, 3, figsize=(15, 5))
axs[0].imshow(orig_img_np)
axs[0].set_title('Original Image')
axs[0].axis('off')

axs[1].imshow(perturbation_bim, cmap='seismic')
axs[1].set_title('Perturbation (Noise)')
axs[1].axis('off')

axs[2].imshow(adv_img_bim_np)
axs[2].set_title('BIM Adversarial Image')
axs[2].axis('off')

plt.tight_layout()
plt.show()

# Predict the label of the BIM adversarial image
with torch.no_grad():
    adv_output_bim = model(adv_image_bim)
    _, adv_predicted_bim = adv_output_bim.max(1)

print(f"Predicted class for BIM targeted attack: {labels[adv_predicted_bim.item()]}")