In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn.functional as F
import torchvision
from torchvision import transforms
import urllib

### Load a Sample Image

In [None]:
image_sloth = Image.open("sloth.jpg")
image_sloth = image_sloth.resize((224, 224))
plt.imshow(image_sloth)

### Convert the Image to a Pytorch Tensor

In [None]:
transform_tensor = transforms.Compose([transforms.ToTensor()])
transform_norm = transforms.Compose([transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
image_tensor = transform_norm(transform_tensor(image_sloth)).float().unsqueeze(0)

### Classify the image using a pre-trained model

In [None]:
model = torchvision.models.mobilenet_v2(pretrained=True)
model.eval()
true_prediction = model(image_tensor)

### Check the Prediction Output

In [None]:
imagenet_labels = dict(pd.read_csv('../mobilenet-feat/imagenet_labels.csv').values)
print(imagenet_labels[true_prediction.max(dim=1)[1].item()])
true_confidence, true_pred_label = F.softmax(true_prediction, dim=1).max(dim=1)
print(f"Prediction confidence = {true_confidence.data.numpy()}")
print(f"Class label in ImageNet = {true_pred_label.data.numpy()}")
print(f"Loss = {F.cross_entropy(true_prediction, true_pred_label).item()}")

## Creating an adversarial example
This can be done by carefully modifying the image such that the change is un-noticeable to the human eye, but confuses the model.

In [None]:
delta = torch.zeros_like(transform_tensor(image_sloth), requires_grad=True)
optimizer = torch.optim.SGD([delta], lr=.9)

device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
model.to(device)
print(device)
epsilon = 1./255

In [None]:
for it in range(51):
    image_tensor = transform_norm(transform_tensor(image_sloth) + delta).float().unsqueeze(0)
    prediction = model(image_tensor.to(device))
    loss = -F.cross_entropy(prediction, true_pred_label)
    if it % 10 == 0:
        print(f"iteration {it}, loss = {loss.item()}")
    
    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    optimizer.step()
    delta.data.clamp_(-epsilon, epsilon)
    
print("True class probability:", F.softmax(prediction, dim=1)[0, true_pred_label].item())

### Given the optimized noise, the class with the highest prediction score:

In [None]:
max_class = prediction.max(dim=1)[1].item()
print(imagenet_labels[prediction.max(dim=1)[1].item()])
confidence, pred_label = F.softmax(prediction, dim=1).max(dim=1)
print(f"Prediction confidence = {confidence.data.numpy()}")
print(f"Class label in ImageNet = {pred_label.data.numpy()}")
print(f"Loss = {F.cross_entropy(prediction, pred_label).item()}")

### The updated image:

In [None]:
new_image_sloth = transform_tensor(image_sloth) + delta
new_image_sloth = new_image_sloth.squeeze().data.numpy().transpose(1,2,0)
new_image_sloth = np.clip(new_image_sloth, 0, 1)
noise = delta.squeeze().data.numpy().transpose(1,2,0)

fig, ax = plt.subplots(1, 3, figsize=(15, 5))
ax[0].imshow(image_sloth), ax[0].set_title(imagenet_labels[true_prediction.max(dim=1)[1].item()][:20]+"...")
ax[1].imshow(new_image_sloth), ax[1].set_title(imagenet_labels[prediction.max(dim=1)[1].item()][:20]+"...")
ax[2].imshow(noise * 200), ax[2].set_title("Added niose x 200")

In [None]:
print(f"Max pixel value in the raw image: {np.array(image_sloth).max()}")
print(f"Max pixel value in the noise image: {np.abs(noise).max()}")

## Adversarial Attack

### Let's turn our SLOTH into a KOALA!
The index of the koala in ImageNet class-list is 105.

In [None]:
fake_label = torch.LongTensor([388])
delta = torch.zeros_like(transform_tensor(image_sloth), requires_grad=True)
optimizer = torch.optim.SGD([delta], lr=.009)

for it in range(51):
    image_tensor = transform_norm(transform_tensor(image_sloth) + delta).float().unsqueeze(0)
    prediction = model(image_tensor.to(device))
    loss = -F.cross_entropy(prediction, true_pred_label) + F.cross_entropy(prediction, fake_label)
    if it % 10 == 0:
        print(f"iteration {it}, loss = {loss.item()}")
    
    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    optimizer.step()
    delta.data.clamp_(-epsilon, epsilon)

In [None]:
max_class = prediction.max(dim=1)[1].item()
print(imagenet_labels[prediction.max(dim=1)[1].item()])
confidence, pred_label = F.softmax(prediction, dim=1).max(dim=1)
print(f"Prediction confidence = {confidence.data.numpy()}")
print(f"Class label in ImageNet = {pred_label.data.numpy()}")
print(f"Loss = {F.cross_entropy(prediction, pred_label).item()}")

### Compare the attacked image with the raw image

In [None]:
new_image_sloth = transform_tensor(image_sloth) + delta
new_image_sloth = new_image_sloth.squeeze().data.numpy().transpose(1,2,0)
new_image_sloth = np.clip(new_image_sloth, 0, 1)
noise = delta.squeeze().data.numpy().transpose(1,2,0)

fig, ax = plt.subplots(1, 3, figsize=(15, 5))
ax[0].imshow(image_sloth), ax[0].set_title(imagenet_labels[true_prediction.max(dim=1)[1].item()][:20]+"...")
ax[1].imshow(new_image_sloth), ax[1].set_title(imagenet_labels[prediction.max(dim=1)[1].item()][:20]+"...")
ax[2].imshow(noise * 200), ax[2].set_title("Added niose x 200")

##### References: 
https://adversarial-ml-tutorial.org/

https://openai.com/blog/adversarial-example-research/