# Adversarial attacks on GoogleNet
The goal of this notebook is to download a pretrained GoogleNet model for classifying CIFAR-10 images, test it on our dataset, then generate adversarial examples and see if they fool the GoogleNet model. Then we'll try transfer-training the GoogleNet model with these adversarial images to see if that makes the network robust against them, and what the accuracy cost is.

The pretrained model and the CIFAR10 class are provided by [Huy Phan](PyTorch_CIFAR10).

In [11]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
from PIL import Image
import imageio
# Model path:
PATH = '../Models/googlenet_cifar10.pth'

In [2]:
batchsize = 1
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='../Data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='../Data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [None]:
class CIFAR10_Module(pl.LightningModule):
    def __init__(self, hparams, pretrained=False):
        super().__init__()
        self.hparams = hparams
        self.criterion = torch.nn.CrossEntropyLoss()
        self.mean = [0.4914, 0.4822, 0.4465]
        self.std = [0.2023, 0.1994, 0.2010]
        self.model = get_classifier(hparams.classifier, pretrained)
        self.train_size = len(self.train_dataloader().dataset)
        self.val_size = len(self.val_dataloader().dataset)
        
    def forward(self, batch):
        images, labels = batch
        predictions = self.model(images)
        loss = self.criterion(predictions, labels)
        accuracy = torch.sum(torch.max(predictions, 1)[1] == labels.data).float() / batch[0].size(0)
        return loss, accuracy
    
    def training_step(self, batch, batch_nb):
        loss, accuracy = self.forward(batch)
        logs = {'loss/train': loss, 'accuracy/train': accuracy}
        return {'loss': loss, 'log': logs}
        
    def validation_step(self, batch, batch_nb):
        avg_loss, accuracy = self.forward(batch)
        loss = avg_loss * batch[0].size(0)
        corrects = accuracy * batch[0].size(0)
        logs = {'loss/val': loss, 'corrects': corrects}
        return logs
                
    def validation_epoch_end(self, outputs):
        loss = torch.stack([x['loss/val'] for x in outputs]).sum() / self.val_size
        accuracy = torch.stack([x['corrects'] for x in outputs]).sum() / self.val_size
        logs = {'loss/val': loss, 'accuracy/val': accuracy}
        return {'val_loss': loss, 'log': logs}
    
    def test_step(self, batch, batch_nb):
        return self.validation_step(batch, batch_nb)
    
    def test_epoch_end(self, outputs):
        accuracy = self.validation_epoch_end(outputs)['log']['accuracy/val']
        accuracy = round((100 * accuracy).item(), 2)
        return {'progress_bar': {'Accuracy': accuracy}}
        
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.learning_rate,
                                    weight_decay=self.hparams.weight_decay, momentum=0.9, nesterov=True)
            
        scheduler = {'scheduler': torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=self.hparams.learning_rate, 
                                                                     steps_per_epoch=self.train_size//self.hparams.batch_size,
                                                                     epochs=self.hparams.max_epochs),
                     'interval': 'step', 'name': 'learning_rate'}
        return [optimizer], [scheduler]
    
    def train_dataloader(self):
        transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                              transforms.RandomHorizontalFlip(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(self.mean, self.std)])
        dataset = CIFAR10(root=self.hparams.data_dir, train=True, transform=transform_train)
        dataloader = DataLoader(dataset, batch_size=self.hparams.batch_size, num_workers=4, shuffle=True, drop_last=True, pin_memory=True)
        return dataloader
    
    def val_dataloader(self):
        transform_val = transforms.Compose([transforms.ToTensor(),
                                            transforms.Normalize(self.mean, self.std)])
        dataset = CIFAR10(root=self.hparams.data_dir, train=False, transform=transform_val)
        dataloader = DataLoader(dataset, batch_size=self.hparams.batch_size, num_workers=4, pin_memory=True)
        return dataloader
    
    def test_dataloader(self):
        return self.val_dataloader()

In [4]:
gnet = torch.load('../Models/googlenet.pt')

In [5]:
def predict_image(network, img):
    """
    Input: Image file path (str)
    Outputs: Predicted image class, probability assigned by network to top class
    """
    img_transform = transforms.Compose([transforms.ToTensor()])
    input_tensor = img_transform(img).float().unsqueeze(0).to(device)
    outputs = network(input_tensor).squeeze()
    class_probas = nn.Softmax()(outputs).detach().cpu().numpy()
    idx = np.argmax(class_probas)
    img_class = class_names[idx]
    proba = class_probas[idx]
    return img_class, proba

In [None]:
def get_adversarial_image(network, img_tuple, epsilon=0.01):
    img_file, label = img_tuple
    img = imageio.imread(img_file)
    img_transform = transforms.Compose([transforms.ToTensor()])
    input_tensor = img_transform(img).float().unsqueeze(0).to(device)
    input_tensor.requires_grad = True
    outputs = network(input_tensor)
    # Format label.
    class_name = labels_class[label]
    class_idx = class_names.index(class_name)
    label = torch.tensor(class_idx).unsqueeze(0).to(device)
    # Get loss gradient with regard to image pixels.
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(outputs, label)
    loss.backward()
    img_gradient = input_tensor.grad
    gradient_signs = torch.sign(img_gradient).cpu().numpy().squeeze()
    # Match shape of image (channels last in this case)
    gradient_signs = np.transpose(gradient_signs, axes=[1, 2, 0])
    pixel_changes = (gradient_signs * 255 * epsilon).astype(np.int16)
    changed_img = (img).astype(np.int16) + pixel_changes
    adv_img = np.clip(changed_img, 0, 255).astype(np.uint8)
    return adv_img

In [13]:
predict_image(gnet, img)

OSError: Cannot understand given URI: (tensor([[[ 0.2392,  0.2471,  0.2941,  ...,  0.0745, -0.0....

In [None]:
# Predict on the same image samples as above.
fig, ax = plt.subplots(nrows=5, ncols=10, figsize=(24, 18))
for i in range(50):
    row = i // 10
    col = i - row * 10
    img = imageio.imread(image_samples[i][0])
    ax[row][col].imshow(img)
    predicted_class, predicted_proba = predict_image(net, img)
    ax[row][col].set_xlabel(f"{predicted_class}: {predicted_proba:.6f}")
    ax[row][col].set_xticks([])
    ax[row][col].set_yticks([])