## Perturbation adversariale par rotation de x dans la direction du gradient

In [3]:
# @title version test, attaque adversarial par rotation de x dans la direction du gradient
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader, Subset
from torch.autograd import Variable

# Définition de l'attaque adversarial
def rotate_by_gradient(model, images, labels, epsilon):
    images.requires_grad = True

    outputs = model(images)
    loss = nn.CrossEntropyLoss()(outputs, labels)
    model.zero_grad()
    loss.backward()

    gradients = images.grad

    perturbation = epsilon * gradients / gradients.norm(p=2, dim=(1, 2, 3), keepdim=True)
    adversarial_images = images + perturbation
    adversarial_images = torch.clamp(adversarial_images, 0, 1)

    return adversarial_images


# Classe du modèle (shallow softmax classifier)
class ShallowSoftmaxClassifier(nn.Module):
    def __init__(self):
        super(ShallowSoftmaxClassifier, self).__init__()
        self.fc = nn.Linear(28 * 28, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28) 
        x = self.fc(x)
        return x
    
# Charger les données MNIST
def load_data():
    transform = transforms.Compose([transforms.ToTensor()])
    full_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    train_loader = DataLoader(full_dataset, batch_size=64, shuffle=True)
    return train_loader

# Entraînement sur des données normales
def train_on_normal_data(model, train_loader, optimizer, epochs=10):
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(images)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")

# Test sur des données normales
def test_on_normal_data(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    incorrect_probs = []

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            probs = nn.Softmax(dim=1)(outputs)
            _, predicted = torch.max(outputs, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            for i in range(labels.size(0)):
                if predicted[i] != labels[i]:
                    incorrect_probs.append(probs[i, predicted[i]].item())

    accuracy = 100 * correct / total
    avg_incorrect_prob = np.mean(incorrect_probs) * 100 if incorrect_probs else 0
    print(f"Accuracy on clean data: {accuracy}%")

    
    if incorrect_probs:
        avg_incorrect_prob = np.mean(incorrect_probs) * 100
        print(f'Average probability assigned to incorrect predictions (on clean data): {avg_incorrect_prob:.2f}%')
    else:
        avg_incorrect_prob = 0
        print('No incorrect predictions to calculate average probability on clean data.')
    return accuracy, avg_incorrect_prob

# Test sur des données adversariales
def test_on_adversarial_data(model, train_loader, epsilon):
    model.eval()

    correct = 0
    total = 0
    incorrect_probs = []

    for images, labels in train_loader:
    
        images = images.clone().detach().requires_grad_(True)

        adversarial_images = rotate_by_gradient(model, images, labels, epsilon)

        outputs = model(adversarial_images)
        probs = nn.Softmax(dim=1)(outputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        for i in range(labels.size(0)):
            if predicted[i] != labels[i]:
                incorrect_probs.append(probs[i, predicted[i]].item())

    accuracy = 100 * correct / total
    print(f'Accuracy on adversarial examples: {accuracy}%')

    if incorrect_probs:
        avg_incorrect_prob = np.mean(incorrect_probs) * 100
        print(f'Average probability assigned to incorrect predictions (on adversarial data): {avg_incorrect_prob:.2f}%')
    else:
        avg_incorrect_prob = 0
        print('No incorrect predictions to calculate average probability on adversarial data.')
    return accuracy, avg_incorrect_prob


# Fonction principale
def main():
    # Paramètres
    epsilon = 0.5  # Magnitude des perturbations adversariales
    epochs = 20  # Nombre d'époques d'entraînement

    train_loader = load_data()
    test_loader = load_data()

    model = ShallowSoftmaxClassifier()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    print("\n--- Training the model ---")
    train_on_normal_data(model, train_loader, optimizer, epochs)

    print("\n--- Testing on clean data ---")
    clean_accuracy, clean_avg_incorrect_prob = test_on_normal_data(model, test_loader)

    print("\n--- Testing on adversarial data ---")
    adversarial_accuracy, adversarial_avg_incorrect_prob = test_on_adversarial_data(model, test_loader, epsilon)

    print("\n--- Comparison ---")
    print(f"Clean Data: Accuracy = {clean_accuracy:.2f}%, Avg Confidence on Incorrect = {clean_avg_incorrect_prob:.2f}%")
    print(f"Adversarial Data: Accuracy = {adversarial_accuracy:.2f}%, Avg Confidence on Incorrect = {adversarial_avg_incorrect_prob:.2f}%")

# Exécuter la fonction principale
if __name__ == "__main__":
    main()



--- Training the model ---
Epoch [1/20], Loss: 0.48311883651180815
Epoch [2/20], Loss: 0.33656363570486814
Epoch [3/20], Loss: 0.3140984530975697
Epoch [4/20], Loss: 0.3020510265250196
Epoch [5/20], Loss: 0.2942419885429365
Epoch [6/20], Loss: 0.2884822124833745
Epoch [7/20], Loss: 0.28421286724682554
Epoch [8/20], Loss: 0.28113810275631673
Epoch [9/20], Loss: 0.27834817867225675
Epoch [10/20], Loss: 0.27526939525676053
Epoch [11/20], Loss: 0.2733153218049993
Epoch [12/20], Loss: 0.2714766095330847
Epoch [13/20], Loss: 0.26983637878222505
Epoch [14/20], Loss: 0.2684981512394287
Epoch [15/20], Loss: 0.26724203368986466
Epoch [16/20], Loss: 0.26556795752490125
Epoch [17/20], Loss: 0.26436674071035027
Epoch [18/20], Loss: 0.2638104881829163
Epoch [19/20], Loss: 0.2623897121468587
Epoch [20/20], Loss: 0.26208111754199587

--- Testing on clean data ---
Accuracy on clean data: 92.89666666666666%
Average probability assigned to incorrect predictions (on clean data): 63.68%

--- Testing on ad