In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LightSource

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# download MNIST training and testing datasets, then prepare corresponding dataloaders (batch size = 100)
mnist_train = datasets.MNIST("./data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST("./data", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(mnist_train, batch_size = 100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size = 100, shuffle=False)




Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 76510044.58it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 4330605.38it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 110558739.59it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 14487094.12it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [27]:
# initialize the CNN architecture with 4 convolutional layers and 2 MLP layers for standard training
torch.manual_seed(0)

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)

model_cnn = nn.Sequential(nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
                          nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(64, 64, 3, padding=1, stride=2), nn.ReLU(),
                          Flatten(),
                          nn.Linear(7*7*64, 100), nn.ReLU(),
                          nn.Linear(100, 10)).to(device)

In [7]:
#### Your task: complete the following function
def pgd(model, X, y, epsilon=0.1, alpha=0.02, num_iter=10, randomize=False):
    """ Construct PGD adversarial examples for the example (X,y)"""

    # delta stores the generated perturbation and updates its value iteratively
    delta = torch.zeros_like(X,requires_grad=True).to(device)
    criterion = nn.CrossEntropyLoss()
    #optimizer = torch.optim.SGD([delta], lr= alpha)
    for t in range(num_iter):
        #optimizer.zero_grad()
        pred = model((X + delta).to(device))
        output =  nn.CrossEntropyLoss()(pred.to(device),torch.tensor(y).to(device))
        output.backward()
        delta.data = delta.data + delta.grad.detach()
        delta.data = delta.data.clamp(-epsilon,epsilon)
        delta.grad.zero_()


    return delta


In [23]:
#### Your task: complete the following functions
def epoch(loader, model, opt=None):
    """Standard training/evaluation epoch over the dataset"""
    loss = 0
    if(opt!= None):
      for (i,j) in (loader):
            opt.zero_grad()
            pred = model(i.to(device))
            loss =  nn.CrossEntropyLoss()(pred,torch.LongTensor(j).to(device))
            # Backward pass to compute the gradient
            with(torch.enable_grad()):
              loss.backward()
            # Clip the gradient to the range [-epsilon, epsilon]
            opt.step()

    errors = 0
    loss = 0
    for (i,j) in (loader):
      pred = model(i.to(device))
      loss +=  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device)).item()
      predictions = torch.argmax(pred, dim = 1)  # Assuming binary classification
      errors += (predictions != j.to(device)).sum().item()
    return  errors/len(loader.dataset)*100 , loss


def epoch_adv(loader, model, attack, opt=None, **kwargs):
    """Adversarial training/evaluation epoch over the dataset"""
    loss = 0
    errors = 0
    tot_l = 0
    for (i,j) in (loader):
            delta = pgd(model, i.to(device) ,torch.tensor(j).to(device), num_iter = 10)
            pred = model(i.to(device) + delta)
            loss =  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device))
            if (opt != None):
                opt.zero_grad()
                loss.backward()
                opt.step()
            tot_l += loss
            errors += (torch.argmax(pred, dim = 1) != j.to(device)).sum().item()

    return errors/len(loader.dataset)*100  , tot_l


In [28]:
# specify the optimizer as SGD
opt = optim.SGD(model_cnn.parameters(), lr=1e-1)

# standard training
for t in range(5):
    train_err, train_loss = epoch(train_loader, model_cnn, opt)
    test_err, test_loss = epoch(test_loader, model_cnn)
    adv_err, adv_loss = epoch_adv(test_loader, model_cnn, pgd)

    print(*("{:.6f}".format(i) for i in (train_err, test_err, adv_err)), sep="\t")

# save the standard trained model for further evaluation
torch.save(model_cnn.state_dict(), "model_cnn.pt")

  loss +=  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device)).item()
  delta = pgd(model, i.to(device) ,torch.tensor(j).to(device), num_iter = 10)
  output =  nn.CrossEntropyLoss()(pred.to(device),torch.tensor(y).to(device))
  loss =  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device))


3.170000	3.070000	4.930000
1.798333	2.290000	4.150000
1.163333	1.540000	3.200000
1.035000	1.630000	3.520000
0.756667	1.360000	2.870000


In [25]:
# use the same CNN architecture for robust training
model_cnn_robust = nn.Sequential(nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
                                 nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
                                 nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
                                 nn.Conv2d(64, 64, 3, padding=1, stride=2), nn.ReLU(),
                                 Flatten(),
                                 nn.Linear(7*7*64, 100), nn.ReLU(),
                                 nn.Linear(100, 10)).to(device)

In [26]:
# specify the optimizer as SGD
opt = optim.SGD(model_cnn_robust.parameters(), lr=1e-1)

# PGD-based adversarial training
for t in range(5):
    train_err, train_loss = epoch_adv(train_loader, model_cnn_robust, pgd, opt)
    test_err, test_loss = epoch(test_loader, model_cnn_robust)
    adv_err, adv_loss = epoch_adv(test_loader, model_cnn_robust, pgd)

    print(*("{:.6f}".format(i) for i in (train_err, test_err, adv_err)), sep="\t")

# save the standard trained model for further evaluation
torch.save(model_cnn_robust.state_dict(), "model_cnn_robust.pt")

  delta = pgd(model, i.to(device) ,torch.tensor(j).to(device), num_iter = 10)
  output =  nn.CrossEntropyLoss()(pred.to(device),torch.tensor(y).to(device))
  loss =  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device))
  loss +=  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device)).item()


17.101667	2.410000	3.320000
2.976667	1.710000	2.460000
2.221667	1.360000	2.160000
1.690000	1.180000	1.890000
1.401667	1.190000	1.810000


In [29]:
# load the standard trained and adversarially trained models
model_cnn.load_state_dict(torch.load("model_cnn.pt"))
model_cnn_robust.load_state_dict(torch.load("model_cnn_robust.pt"))

<All keys matched successfully>

In [30]:
def fgsm(model, X, y, epsilon=0.1):
    """ Construct FGSM adversarial examples for the example (X,y)"""
    delta = torch.zeros_like(X, requires_grad=True)
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()

In [31]:
# clean performance (no attack)
print("clean:", "{:.4f}".format(epoch(test_loader, model_cnn)[0]),
      "{:.4f}".format(epoch(test_loader, model_cnn_robust)[0]))

# evaluate both models using FGSM attack
print("FGSM: ", "{:.4f}".format(epoch_adv(test_loader, model_cnn, fgsm)[0]),
      "{:.4f}".format(epoch_adv(test_loader, model_cnn_robust, fgsm)[0]))

# evaluate both models using PGD attack
print("PGD (10 iter):", "{:.4f}".format(epoch_adv(test_loader, model_cnn, pgd, num_iter=10)[0]),
      "{:.4f}".format(epoch_adv(test_loader, model_cnn_robust, pgd, num_iter=10)[0]))

  loss +=  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device)).item()


clean: 1.3600 1.1900


  delta = pgd(model, i.to(device) ,torch.tensor(j).to(device), num_iter = 10)
  output =  nn.CrossEntropyLoss()(pred.to(device),torch.tensor(y).to(device))
  loss =  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device))


FGSM:  2.8700 1.8100
PGD (10 iter): 2.8700 1.8100


In [None]:
#### Your task (bonus): develop an attack method to achieve an attack success rate as high as possible. You can modify the following function if needed.

# You can try out some of the attack methods introduced in Lectures 3-4 or develop your unique creative attack.
# In principle, the performance of your attack should be better than FGSM or PGD, 10 iter;
# The higher attack success rates you can achieve, the higher credits you may receive.

def my_attack(model, X, y, epsilon=0.1):
  """ Construct adversarial examples for the example (X,y)"""

  return

In [None]:
print("My Attack: ", "{:.4f}".format(epoch_adv(test_loader, model_cnn, my_attack)[0]),
      "{:.4f}".format(epoch_adv(test_loader, model_cnn_robust, my_attack)[0]))