In [8]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn.functional as F

import matplotlib.pyplot as plt
import numpy as np

## 1. Chargement de la base de données

In [4]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 11305704.62it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 53742543.84it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 11935618.78it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 6149299.15it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



## 2. Modèle de prédiction

In [10]:
from maxout import MaxoutCNN

maxout_network = MaxoutCNN()
print(maxout_network)

MaxoutCNN(
  (architecture): Sequential(
    (0): Conv2d(1, 12, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): Maxout()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Dropout2d(p=0.4, inplace=False)
    (4): Conv2d(6, 32, kernel_size=(5, 5), stride=(1, 1))
    (5): Maxout()
    (6): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (7): Dropout2d(p=0.4, inplace=False)
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): Linear(in_features=800, out_features=240, bias=True)
    (10): Maxout()
    (11): Dropout(p=0.4, inplace=False)
    (12): Linear(in_features=120, out_features=168, bias=True)
    (13): Maxout()
    (14): Dropout(p=0.4, inplace=False)
  )
  (classifier): Linear(in_features=84, out_features=10, bias=True)
)


## 3. Défintion de la fonction de coût
$$\tilde{J}(\theta, x, y) = \alpha J(\theta, x, y) + (1 - \alpha) J(\theta, x + \epsilon \cdot \text{sign}(\nabla_x J(\theta, x, y)))$$

On définit dans un premier temps une loss de base $$J(\theta, x, y)$$. Comme rien n'est précisé dans l'article, on choisit la cross-entropy.

In [15]:
# Fonction de perte standard
def loss_fn(model, x, y):
    output = model(x)
    return F.cross_entropy(output, y)

# Fonction de perte adversariale
def adversarial_loss_fn(model, x, y, epsilon, alpha):
    # Calcul de la perte standard
    standard_loss = loss_fn(model, x, y)
    
    # Génération de l'exemple adverse
    x_adv = x + epsilon * torch.sign(torch.autograd.grad(standard_loss, x, create_graph=True)[0])
    
    # Calcul de la perte sur l'exemple adverse
    adversarial_loss = loss_fn(model, x_adv, y)
    
    # Combinaison des deux pertes
    return alpha * standard_loss + (1 - alpha) * adversarial_loss

## 4. Entraînemnt d'un CNN sans maxout 

On définit un CNN similaire avec à la place des maxout, des relu. On entraîne ce CNN sur la base de données MNIST.

In [11]:
class CNN(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(CNN, self).__init__()
        self.architecture = nn.Sequential(
            nn.Conv2d(1, 6, 5,padding=2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.AvgPool2d(kernel_size=2,stride=2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )

        self.classifier = nn.Linear(84, 10)

    def forward(self, x):
        x = self.architecture(x)
        return self.classifier(x)

cnn_network = CNN()
print(cnn_network)

CNN(
  (architecture): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (4): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (5): ReLU()
    (6): Dropout(p=0.5, inplace=False)
    (7): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): ReLU()
    (10): Dropout(p=0.5, inplace=False)
    (11): Linear(in_features=400, out_features=120, bias=True)
    (12): ReLU()
    (13): Dropout(p=0.5, inplace=False)
    (14): Linear(in_features=120, out_features=84, bias=True)
    (15): ReLU()
    (16): Dropout(p=0.5, inplace=False)
  )
  (classifier): Linear(in_features=84, out_features=10, bias=True)
)


## 5. Entrainement et Attaque sur le CNN

Définition des paramètres utiles à l'entrainement.

In [13]:
lr = 0.01
momentum = 0.9
batch_size = 64
num_epochs = 100
sgd= optim.SGD(cnn_network.parameters(), lr=lr, momentum=momentum)


train_loader = DataLoader(training_data, batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [16]:


# Loop over the dataset multiple times
for epoch in range(num_epochs):
    # Loop over each batch of data
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs.requires_grad = True

        # Zero the parameter gradients
        sgd.zero_grad()

        # Forward pass
        outputs = cnn_network(inputs)

        # Calculate loss
        loss = adversarial_loss_fn(cnn_network, inputs, labels, epsilon=0.1, alpha=0.5)

        # Backward pass and optimization
        loss.backward()
        sgd.step()

print('Finished Training')

RuntimeError: One of the differentiated Tensors does not require grad