In [7]:
# @title Réseau Maxout avec l'ajout de la fonction L1 Weight Decay
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset

def FGSM(model, images, labels, epsilon):
    images.requires_grad = True

    outputs = model(images)
    labels = labels.to(outputs.device)
    loss = nn.CrossEntropyLoss()(outputs, labels)

    model.zero_grad()
    loss.backward()

    # Génération des perturbations adversariales
    perturbation = epsilon * images.grad.sign()
    adversarial_images = images + perturbation
    adversarial_images = torch.clamp(adversarial_images, 0, 1)
    return adversarial_images

# Classe Maxout
class Maxout(nn.Module):
    def __init__(self, in_features, out_features, num_pieces):
        super(Maxout, self).__init__()
        self.num_pieces = num_pieces
        self.linear = nn.Linear(in_features, out_features * num_pieces)

    def forward(self, x):
        x = self.linear(x)
        x = x.view(x.size(0), -1, self.num_pieces)

        return x.max(-1)[0]

# Classe du modèle (réseau profond avec maxout)
class DeepMaxoutNN(nn.Module):
    def __init__(self):
        super(DeepMaxoutNN, self).__init__()
        self.maxout1 = Maxout(28 * 28, 256, 4)  # Maxout avec 4 morceaux
        self.maxout2 = Maxout(256, 128, 4)
        self.fc = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.maxout1(x)
        x = self.maxout2(x)
        x = self.fc(x)
        return x

# Charger les données MNIST (avec uniquement des 3 et des 7)
def load_data():
    transform = transforms.Compose([transforms.ToTensor()])
    full_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

    indices = [i for i in range(len(full_dataset)) if full_dataset.targets[i] == 3 or full_dataset.targets[i] == 7]
    filtered_dataset = Subset(full_dataset, indices)

    train_loader = DataLoader(filtered_dataset, batch_size=64, shuffle=True)
    return train_loader

# Entraînement sur des données normales
def train_on_normal_data(model, train_loader, optimizer, epochs=10, l1_lambda=0.0):
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(images)
            loss = nn.CrossEntropyLoss()(outputs, labels)

            # Ajout de la régularisation L_1 sur les poids de la première couche uniquement
            if l1_lambda > 0.0:
                l1_norm = torch.norm(model.maxout1.linear.weight, p=1)
                loss += l1_lambda * l1_norm

            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")

# Test sur des données adversariales
def test_on_adversarial_data(model, train_loader, epsilon):
    model.eval()

    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.clone().detach().requires_grad_(True)
        adversarial_images = FGSM(model, images, labels, epsilon)

        outputs = model(adversarial_images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy on adversarial examples: {accuracy}%')
    return accuracy

# Fonction principale
def main():
    # Paramètres
    epsilon = 0.025  # Magnitude des perturbations adversariales
    epochs = 10 # Nombre d'époques d'entraînement
    l1_lambda = 0.01  # Coefficient de régularisation L1

    train_loader = load_data()

    model = DeepMaxoutNN()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    train_on_normal_data(model, train_loader, optimizer, epochs, l1_lambda)

    accuracy = test_on_adversarial_data(model, train_loader, epsilon)

# Exécuter la fonction principale
if __name__ == "__main__":
    main()



Epoch [1/10], Loss: 14.278231424154695
Epoch [2/10], Loss: 10.164316496898218
Epoch [3/10], Loss: 10.107071507837354
Epoch [4/10], Loss: 10.029570677845749
Epoch [5/10], Loss: 9.975398515917592
Epoch [6/10], Loss: 10.034902267849322
Epoch [7/10], Loss: 9.92331791415657
Epoch [8/10], Loss: 9.897075156575626
Epoch [9/10], Loss: 9.889410598990844
Epoch [10/10], Loss: 9.873956041237742
Accuracy on adversarial examples: 94.27234591803807%


On observe un immense gain de performances ! Cependant, comme il est dit dans l'article, si on prend un lambda supérieur à epsilon, le réseau perd en performances.