In [2]:
# [CELL ID] 1

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

[CELL ID] 2
## From adversarial examples to training robust models

In the previous notebooks, we focused on methods for solving the maximization problem over perturbations; that is, to finding the solution to the problem
\begin{equation}
\DeclareMathOperator*{\maximize}{maximize}
\maximize_{\|\delta\| \leq \epsilon} \ell(h_\theta(x + \delta), y).
\end{equation}

In this notebook, we will focus on training a robust classifier. More precisly, we aim at solving following minimization problem, namely Adversarial Training:
\begin{equation}
\DeclareMathOperator*{\minimize}{minimize}
\minimize_\theta \frac{1}{|S|} \sum_{x,y \in S} \max_{\|\delta\| \leq \epsilon} \ell(h_\theta(x + \delta), y).
\end{equation}
The order of the min-max operations is important here.  Specially, the max is inside the minimization, meaning that the adversary (trying to maximize the loss) gets to "move" _second_.  We assume, essentially, that the adversary has full knowledge of the classifier parameters $\theta$, and that they get to specialize their attack to whatever parameters we have chosen in the outer minimization. The goal of the robust optimization formulation, therefore, is to ensure that the model cannot be attacked _even if_ the adversary has full knowledge of the model.  Of course, in practice we may want to make assumptions about the power of the adversary but it can be difficult to pin down a precise definition of what we mean by the "power" of the adversary, so extra care should be taken in evaluating models against possible "realistic" adversaries.

## Exercice 1
1. Train a robust classifier using Adversarial Training
2. Evaluate your classifier on natural and adversarial examples
3. Make an analysis and conclude

### Loading MNIST dataset (train set and test set)

In [3]:
# [CELL ID] 3

# load MNIST dataset
def load_mnist(split, batch_size):
  train = True if split == 'train' else False
  dataset = datasets.MNIST("./data", train=train, download=True, transform=transforms.ToTensor())
  return DataLoader(dataset, batch_size=batch_size, shuffle=train)

batch_size = 100
train_loader = load_mnist('train', batch_size)
test_loader = load_mnist('test', batch_size)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


## Implementing FGSM and PGD

In [None]:
# [CELL ID] 4

class FastGradientSignMethod:
  def __init__(self, model, eps):
    self.model = model
    self.eps = eps
  
  def compute(self, x, y):
    """ Construct FGSM adversarial perturbation for examples x"""    
    x.requires_grad=True # enable locally gradient computation on x
    
    output = self.model(x)
    loss_func = nn.CrossEntropyLoss() # define a specific loss to compute the gradient w.r.t x
    loss = loss_func(output, y)
    loss.backward() # back-propagate the gradient w.r.t x

    delta = self.eps * x.grad.sign() # compute the delta of pertubation by applying the sign of the gradient of x
    x.requires_grad=False # disable gradient computation on x
    
    return x + delta # return the attacked (modified) image


class ProjectedGradientDescent:
  
  def __init__(self, model, eps, alpha, num_iter):
    self.model = model
    self.eps = eps
    self.alpha = alpha
    self.num_iter = num_iter
  
  def compute(self, x, y):
    # we define a specific loss to compute the gradient w.r.t x
    loss_func = nn.CrossEntropyLoss()
    
    # then initialize the delta to 0 with the same shape as x
    delta = torch.zeros_like(x, requires_grad=True)

    # and perform gradient descent iterative procedure
    for i in range(self.num_iter):
      # we compute the output image from the modified image
      output = self.model(x + delta)

      # evaluate the loss on the outup
      loss = loss_func(output, y)
      
      # back-propagate the gradient w.r.t delta
      loss.backward()

      # update the delta with it current gradient
      delta.data += self.alpha * delta.grad.data

      # clip the delta in the range [-eps, eps]
      delta.data = delta.data.clamp(-self.eps, self.eps)

      # reset the gradient on delta
      delta.grad.zero_()

    return x + delta.detach() # return the modified (attacked) image


In [None]:
# [CELL ID] 5

class ConvModel(torch.nn.Module):
  
  def __init__(self):
    super(ConvModel, self).__init__()
    # code here ...
    
  def forward(self, x):
    # code here ...

In [None]:
# [CELL ID] 6

def adversarial_train_model(model, criterion, optimizer, loader, attack):
  """Function to train the model"""
  # code here ...
    
# adverserial training with PGD
model = ConvModel()
model = model.cuda()

# define your loss
criterion = # code here ...

# define the optimizer
opt = # code here ...

# define the attack
attack = # code here ...

adversarial_train_model(model, criterion, opt, train_loader, attack)

In [None]:
# [CELL ID] 7

def eval_model(model, loader, attack=None):
  """Function to evaluate your model on a specific loader"""
  # code here ...

attack = # code here ...
eval_model(model, test_loader)
eval_model(model, test_loader, attack)