# Boilerplate

Packae installation, loading, and dataloaders. There's also a simple model defined. You can change it your favourite architecture if you want.

In [1]:
!pip install tensorboardX

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time

from torchvision import datasets, transforms
from tensorboardX import SummaryWriter

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Simple NN. You can change this if you want.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(28*28, 200)
        self.fc2 = nn.Linear(200,10)

    def forward(self, x):
        x = x.view((-1, 28*28))
        x = F.relu(self.fc(x))
        x = self.fc2(x)
        return x

class Normalize(nn.Module):
    def forward(self, x):
        return (x - 0.1307)/0.3081

# Add the data normalization as a first "layer" to the network
# this allows us to search for adverserial examples to the real image, rather than
# to the normalized image
model = nn.Sequential(Normalize(), Net())

model = model.to(device)
model.train()

Sequential(
  (0): Normalize()
  (1): Net(
    (fc): Linear(in_features=784, out_features=200, bias=True)
    (fc2): Linear(in_features=200, out_features=10, bias=True)
  )
)

# Implement the Attacks

Functions are given a simple useful signature that you can start with. Feel free to extend the signature as you see fit.

You may find it useful to create a 'batched' version of PGD that you can use to create the adversarial attack.

In [2]:
# The last argument 'targeted' can be used to toggle between a targeted and untargeted attack.
def fgsm(model, x, target, eps, targeted=True):
    x.requires_grad_()
    adv_x = x
    L = nn.CrossEntropyLoss()
    loss = L(model(adv_x), target)
    loss.backward(retain_graph=True)
    if(targeted):
        adv_x = x - eps*x.grad.sign();
    else:
        adv_x = x + eps*x.grad.sign();
    return torch.clamp(adv_x, min=0, max=1)


def pgd_untargeted(model, x, label, k, eps, eps_step):
    adv_x = x
    adv_x.requires_grad_()
    for i in range(k):
        adv_x.retain_grad()
        adv_x = fgsm(model, adv_x, label, eps_step, targeted=False)
        adv_x = torch.clamp(x + torch.clamp(adv_x - x, max=eps), min=0, max=1)
        new_class = model(adv_x).argmax(dim=1).item()
        if(new_class != label):
            return adv_x
    return adv_x
        
def pgd_batch(model, xs, labels, k, eps, eps_step):
    adv_x = xs
    adv_x.requires_grad_()
    for i in range(k):
        adv_x.retain_grad()
        adv_x = fgsm(model, adv_x, labels, eps_step, targeted=False)
        adv_x = torch.clamp(xs + torch.clamp(adv_x - xs, max=eps), min=0, max=1)
    return adv_x

# Implement Adversarial Training

In [3]:
def train_model(model, num_epochs, enable_defense=True):
    learning_rate = 0.0001

    opt = optim.Adam(params=model.parameters(), lr=learning_rate)

    ce_loss = torch.nn.CrossEntropyLoss()

    tot_steps = 0

    for epoch in range(1,num_epochs+1):
        t1 = time.time()
        for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
            if enable_defense:
                model.eval()
                x_batch = pgd_batch(model, x_batch, y_batch, 10, .06, .01)
                model.train()

            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            tot_steps += 1
            opt.zero_grad()
            out = model(x_batch)
            batch_loss = ce_loss(out, y_batch)
            batch_loss.backward()
            opt.step()

        tot_test, tot_acc = 0.0, 0.0
        for batch_idx, (x_batch, y_batch) in enumerate(test_loader):
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            out = model(x_batch)
            pred = torch.max(out, dim=1)[1]
            acc = pred.eq(y_batch).sum().item()
            tot_acc += acc
            tot_test += x_batch.size()[0]
        t2 = time.time()

        print('Epoch %d: Accuracy %.5lf [%.2lf seconds]' % (epoch, tot_acc/tot_test, t2-t1))

# Study Accuracy, Quality, etc.

Compare the various results and report your observations on the submission.

In [4]:
# Your code here
#train_model(model, 5)
train_model(model, 5, False)

Epoch 1: Accuracy 0.91930 [7.61 seconds]
Epoch 2: Accuracy 0.93590 [8.33 seconds]
Epoch 3: Accuracy 0.94730 [8.27 seconds]
Epoch 4: Accuracy 0.95470 [7.74 seconds]
Epoch 5: Accuracy 0.95820 [7.57 seconds]


In [5]:
def test_model_pgd(model, k, eps, eps_step):
    t1 = time.time()
    tot_test, tot_acc = 0.0, 0.0
    for batch_idx, (x_batch, y_batch) in enumerate(test_loader):
        x_batch = pgd_batch(model, x_batch, y_batch, k, eps, eps_step)
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        out = model(x_batch)
        pred = torch.max(out, dim=1)[1]
        acc = pred.eq(y_batch).sum().item()
        tot_acc += acc
        tot_test += x_batch.size()[0]
    t2 = time.time()

    print('Accuracy %.5lf [%.2lf seconds]' % (tot_acc/tot_test, t2-t1))

In [6]:
model.eval()
test_model_pgd(model, 20, .1, .02)

Accuracy 0.00030 [21.83 seconds]


In [7]:
test_model_pgd(model, 8, .07, .01)

Accuracy 0.52290 [5.43 seconds]


In [None]:
test_model_pgd(model, 6, .05, .01)

In [8]:
def test_model_fgsm(model, eps):
    t1 = time.time()
    tot_test, tot_acc = 0.0, 0.0
    for batch_idx, (x_batch, y_batch) in enumerate(test_loader):
        x_batch = fgsm(model, x_batch, y_batch, eps, targeted=False)
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        out = model(x_batch)
        pred = torch.max(out, dim=1)[1]
        acc = pred.eq(y_batch).sum().item()
        tot_acc += acc
        tot_test += x_batch.size()[0]
    t2 = time.time()

    print('Accuracy %.5lf [%.2lf seconds]' % (tot_acc/tot_test, t2-t1))

In [9]:
test_model_fgsm(model, .1)

Accuracy 0.27650 [1.20 seconds]


In [10]:
test_model_fgsm(model, .07)

Accuracy 0.58090 [1.18 seconds]


In [None]:
test_model_fgsm(model, .05)

In [None]:
from matplotlib import pyplot as plt
import numpy as np
for batch_idx, (x_batch, y_batch) in enumerate(test_loader):
    #x_batch = pgd_batch(model, x_batch, y_batch, 20, .1, .02)
    first_image = x_batch[3].detach().numpy()
    pixels = first_image.reshape((28, 28))
    plt.imshow(pixels, cmap='gray')
    plt.show()
    break