# Boilerplate

Package installation, loading, and dataloaders. There's also a simple model defined. You can change it your favourite architecture if you want.

In [41]:
#!pip install tensorboardX
#!pip install torchvision 

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
#!pip install matplotlib
import matplotlib.pyplot as plt

from torchvision import datasets, transforms
# from tensorboardX import SummaryWriter

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Simple NN. You can change this if you want. If you change it, mention the architectural details in your report.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(28*28, 200)
        self.fc2 = nn.Linear(200,10)

    def forward(self, x):
        x = x.view((-1, 28*28))
        x = F.relu(self.fc(x))
        x = self.fc2(x)
        return x

class Normalize(nn.Module):
    def forward(self, x):
        return (x - 0.1307)/0.3081

# Add the data normalization as a first "layer" to the network
# this allows us to search for adverserial examples to the real image, rather than
# to the normalized image
model = nn.Sequential(Normalize(), Net())

model = model.to(device)
model.train()

Sequential(
  (0): Normalize()
  (1): Net(
    (fc): Linear(in_features=784, out_features=200, bias=True)
    (fc2): Linear(in_features=200, out_features=10, bias=True)
  )
)

# Implement the Attacks

Functions are given a simple useful signature that you can start with. Feel free to extend the signature as you see fit.

You may find it useful to create a 'batched' version of PGD that you can use to create the adversarial attack.

In [42]:
# The last argument 'targeted' can be used to toggle between a targeted and untargeted attack.
def fgsm(model, x, eps,y,T=True):
    x.requires_grad_()
    adv_x = x
    L = nn.CrossEntropyLoss()
    loss = L(model(adv_x), y)
    loss.backward(retain_graph=True)
    adv_x = x + eps*x.grad.sign();
    return torch.clamp(adv_x, min=0, max=1)
    
def pgd_untargeted(model, x, y, k, eps, eps_step):
    model.eval()
    adv_x = x
    adv_x.requires_grad_()

    for i in range(k):
        adv_x.retain_grad()
        adv_x = fgsm(model, adv_x, eps_step,y,T=False)
        adv_x = torch.clamp(adv_x - x, -eps, eps)
        adv_x = torch.clamp(x + adv_x, 0, 1)
    return adv_x



    

# Implement Adversarial Training

In [43]:
def train_model(model, num_epochs, enable_defense=True, attack='pgd', eps=0.1):
    # TODO: implement this function that trains a given model on the MNIST dataset.
    # this is a general-purpose function for both standard training and adversarial training.
    # (toggle enable_defense parameter to switch between training schemes)
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    criterion = nn.CrossEntropyLoss()
    tot_test=0.0
    tot_acc =0.0
    for epoch in range(1,num_epochs+1):
        running_loss = 0.0
        for batch_idx, (x, y) in enumerate(train_loader):   
            if enable_defense:
                x = pgd_untargeted(model, x, y, 10, eps, 0.01)
            optimizer.zero_grad()
            output = model(x)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            x, y = x.to(device), y.to(device)
            out = model(x)
            pred = torch.max(out, dim=1)[1]
            acc = pred.eq(y).sum().item()
            tot_acc += acc
            tot_test += x.size()[0]
            
        print('Epoch %d: Accuracy %.5lf [%.2lf seconds]' % (epoch, tot_acc/tot_test))


In [44]:
def test_model_on_attacks(model, attack='pgd', eps=0.1):
    # TODO: implement this function to test the robust accuracy of the given model
    # use pgd_untargeted() within this function
    
    model.eval()
    tot_test=0.0
    tot_acc =0.0
    for batch_idx, (x, y) in enumerate(train_loader):
        x = pgd_untargeted(model, x, y, 10, eps, 0.01)
        
        out = model(x)
        
        pred = torch.max(out, dim=1)[1]
        acc = pred.eq(y).sum().item()
        tot_acc += acc
        tot_test += x.size()[0]
    
    print('Robust Accuracy %.5lf' % (100*(tot_acc/tot_test)))
    

# Study Accuracy, Quality, etc.

Compare the various results and report your observations on the submission.

In [45]:
## train the original model
model = nn.Sequential(Normalize(), Net())
model = model.to(device)
model.train()

train_model(model, 5, False)
torch.save(model.state_dict(), 'weights.pt')

Epoch 1: Accuracy 0.87160 [5.42 seconds]
Epoch 2: Accuracy 0.89998 [4.68 seconds]
Epoch 3: Accuracy 0.91427 [4.20 seconds]
Epoch 4: Accuracy 0.92370 [3.55 seconds]
Epoch 5: Accuracy 0.93073 [3.48 seconds]


In [49]:
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

model.eval()
tot_test=0
tot_acc =0
for batch_idx, (x, y) in enumerate(train_loader):


    x, y = x.to(device), y.to(device)
    out = model(x)
    
    pred = torch.max(out, dim=1)[1]
    acc = pred.eq(y).sum().item()
    tot_acc += acc
    tot_test += x.size()[0]

print('Standard Accuracy %.5lf' % (100*(tot_acc/tot_test)))


  model.load_state_dict(torch.load('weights.pt'))


Standard Accuracy 96.32667


In [46]:
## PGD attack
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

for eps in [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='pgd', eps=eps)

  model.load_state_dict(torch.load('weights.pt'))


Accuracy 0.72942 [55.67 seconds]
Accuracy 0.19707 [54.36 seconds]
Accuracy 0.19707 [54.25 seconds]
Accuracy 0.19707 [54.23 seconds]


In [47]:
## PGD based adversarial training
model = nn.Sequential(Normalize(), Net())
eps = 0.1
train_model(model, 5, True, 'pgd', eps)
torch.save(model.state_dict(), f'weights_AT_{eps}.pt')

Epoch 1: Accuracy 0.55807 [63.63 seconds]
Epoch 2: Accuracy 0.63928 [64.61 seconds]
Epoch 3: Accuracy 0.68357 [64.11 seconds]
Epoch 4: Accuracy 0.71281 [65.55 seconds]
Epoch 5: Accuracy 0.73442 [64.02 seconds]


In [48]:
## PGD attack
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load(f'weights_AT_0.1.pt'))

for eps in [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='pgd', eps=eps)

  model.load_state_dict(torch.load(f'weights_AT_0.1.pt'))


Accuracy 0.90988 [54.68 seconds]
Accuracy 0.82735 [56.16 seconds]
Accuracy 0.82735 [56.89 seconds]
Accuracy 0.82735 [57.45 seconds]
