In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

mnist_train = datasets.MNIST("../data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST("../data", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(mnist_train, batch_size = 100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size = 100, shuffle=False)

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

torch.manual_seed(0)

def fgsm(model, X, y, epsilon=0.1):
    """ Construct FGSM adversarial examples on the examples X"""
    delta = torch.zeros_like(X, requires_grad=True)

    # Step 1: Calculate the perturbed input
    perturbed_input = X + delta

    # Step 2: Pass the perturbed input through the model
    output = model(perturbed_input)

    # Step 3: Compute the Cross-Entropy Loss
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(output, y)
    loss.backward()

    return epsilon * delta.grad.detach().sign()



def pgd_l0(model, X, y, epsilon=0.1, alpha=0.01, num_iter=20, randomize=False):
    """ Construct L0 norm adversarial examples on the examples X"""
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
    
    # Iteratively update the perturbation
    for t in range(num_iter):

        # Step 1: Calculate the perturbed input
        perturbed_input = X + delta

        # Step 2: Pass the perturbed input through the model
        output = model(perturbed_input)

        # Step 3: Compute the Cross-Entropy Loss
        loss_fn = nn.CrossEntropyLoss()
        loss = loss_fn(output, y)
        loss.backward()

        # Update the perturbation based on gradient sign and magnitude
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(0, 1)
        delta.data = (delta.data >= epsilon).float() * delta.data

        # Reset the gradients
        delta.grad.zero_()

    # Return the adversarial perturbation
    return delta.detach()

In [None]:
def epoch_adversarial(loader, model, attack, opt=None, **kwargs):
    """Adversarial training/evaluation epoch over the dataset"""
    total_loss, total_err = 0.,0.
    for X,y in loader:
        X,y = X.to(device), y.to(device)
        delta = attack(model, X, y, **kwargs)
        yp = model(X+delta)
        loss = nn.CrossEntropyLoss()(yp,y)
        if opt:
            opt.zero_grad()
            loss.backward()
            opt.step()
        
        total_err += (yp.max(dim=1)[1] != y).sum().item()
        total_loss += loss.item() * X.shape[0]
    return total_err / len(loader.dataset), total_loss / len(loader.dataset)

In [None]:
def pgd_linf(model, X, y, epsilon=0.1, alpha=0.01, num_iter=20, randomize=False):
    """ Construct FGSM adversarial examples on the examples X"""
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
        
    for t in range(num_iter):
        loss = nn.CrossEntropyLoss()(model(X + delta), y)
        loss.backward()
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()