# Boilerplate

Package installation, loading, and dataloaders. There's also a simple model defined. You can change it your favourite architecture if you want.

In [1]:
# !pip install tensorboardX

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt

from torchvision import datasets, transforms
# from tensorboardX import SummaryWriter

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Simple NN. You can change this if you want. If you change it, mention the architectural details in your report.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(28*28, 50)
        self.fc2 = nn.Linear(50,50)
        self.fc3 = nn.Linear(50,50)
        self.fc4 = nn.Linear(50,10)

    def forward(self, x):
        x = x.view((-1, 28*28))
        x = F.relu(self.fc(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

class Normalize(nn.Module):
    def forward(self, x):
        return (x - 0.1307)/0.3081

# Add the data normalization as a first "layer" to the network
# this allows us to search for adverserial examples to the real image, rather than
# to the normalized image
model = nn.Sequential(Normalize(), Net())

model = model.to(device)
model.train()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1133)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:11<00:00, 829615.72it/s] 


Extracting mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1133)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1040309.29it/s]


Extracting mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1133)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 8672496.99it/s]


Extracting mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1133)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 1271221.72it/s]

Extracting mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw






Sequential(
  (0): Normalize()
  (1): Net(
    (fc): Linear(in_features=784, out_features=50, bias=True)
    (fc2): Linear(in_features=50, out_features=50, bias=True)
    (fc3): Linear(in_features=50, out_features=50, bias=True)
    (fc4): Linear(in_features=50, out_features=10, bias=True)
  )
)

# Implement Standard Training

In [2]:
def train_model(model, num_epochs):
    # TODO: implement this function that trains a given model on the MNIST dataset.
    # this is a general-purpose function for both standard training and adversarial training.
    # (toggle enable_defense parameter to switch between training schemes)
    model.train()
    optimizer = optim.SGD(model.parameters())

    for epoch in range(num_epochs):


        for data, label in train_loader:
            data = data.to(device)
            label = label.to(device)


            #standard training
            optimizer.zero_grad()
            out = model(data)
            loss = F.cross_entropy(out, label)

            loss.backward()
            optimizer.step()


In [47]:
def get_worst_case_logits(data, label, model, eps_test):
    lb = data - eps_test
    ub = data + eps_test

    #propogate through each layer based on linear or ReLU
    for layer in model.children():
        lb = layer(lb)
        ub = layer(ub)

    true_classes = label.unsqueeze(1) == torch.arange(10).to(device)
    # print("true classes", true_classes)
    logits = torch.where(true_classes, lb, ub)
    # print("logits", logits)
    return logits

In [38]:
def train_model_IBP(model, num_epochs):
    #  trains a given model on the MNIST dataset.
    model.train()
    optimizer = optim.SGD(model.parameters())

    # setup eps_train and k steps for each epoch
    eps_test = 0 
    eps_test_step = .1 / num_epochs
    
    k = 1
    k_step = -0.5 / num_epochs


    for epoch in range(num_epochs):

        for data, label in train_loader:
            data = data.to(device)
            label = label.to(device)

            optimizer.zero_grad()
            out = model(data)
            z_hat = get_worst_case_logits(data, label, model, eps_test)

            #TODO edit loss function
            loss_fit = F.cross_entropy(out, label)
            loss_spec = F.cross_entropy(z_hat, label) 
            # print("loss_fit:", loss_fit, "loss_spec:", loss_spec)
            loss = (k*loss_fit) + ((1-k)*loss_spec)
            loss.backward()
            optimizer.step()
        
        eps_test += eps_test_step
        k += k_step

In [5]:
#Interval analysis
def interval_analysis(model, input, eps):

    lb = input - eps
    ub = input + eps

    lb = torch.clamp(lb, 0, 1)
    ub = torch.clamp(ub, 0, 1)
    
    lb_out = model(lb)
    ub_out = model(ub)

    return lb_out, ub_out

In [6]:
def test_model(model):
    # TODO: implement this function to test the robust accuracy of the given model
    # use pgd_untargeted() within this function

    model.eval()

    correct, total = 0, 0

    for data, label in test_loader:
        data = data.to(device)
        label = label.to(device)

        out = model(data)
        _, predicted = torch.max(out.data, 1)
        
        # print(label.size(0))
        # print(data.size(0))
        # print(out.size(0))
        total += label.size(0)
        # print(predicted)
        correct += (predicted == label).sum().item()

    print("accuracy", 100 * correct / total)
    

In [7]:
def test_robustness(model):

    model.eval()

    for eps in [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1]:
        correct, total = 0, 0

        for data, label in test_loader:
            data = data.to(device)
            label = label.to(device)

            out_lb, out_ub = interval_analysis(model, data, eps)
            out_lb, out_ub = out_lb.argmax(dim=1), out_ub.argmax(dim=1) #choose class for each image
            

            total += label.size(0)
            correct += (out_lb == out_ub).sum().item()
        print("eps:", eps)
        print("percent robust", 100 * correct / total)
        print()
    
        

# Study Accuracy, Quality, etc.

Compare the various results and report your observations on the submission.

In [8]:
# The last argument 'targeted' can be used to toggle between a targeted and untargeted attack.
def fgsm(model, x, y, eps):
    #TODO: implement this as an intermediate step of PGD
    # Notes: put the model in eval() mode for this function
    model.eval()                   

    x.requires_grad = True

    #get gradient loss
    output = model(x)
    loss = F.cross_entropy(output,y)
    model.zero_grad()
    loss.backward()

    sign_x = x.grad.sign()

    #get eta with e * sign(loss grad) 
    n = eps * sign_x

    x_prime = x + n
    x_prime = torch.clamp(x_prime,0,1)

    return x_prime

def pgd_untargeted(model, x, y, k, eps, eps_step):
    #TODO: implement this 
    # Notes: put the model in eval() mode for this function
    # x: input image
    # y: ground truth label for x
    # k: steps of FGSM
    # eps: projection region for PGD (note the need for normalization before projection, as eps values are for inputs in [0,1])
    # eps_step: step for one iteration of FGSM
    model.eval()

    x_init = x.clone().detach()
    ball_max = x_init + eps
    ball_min = x_init - eps

    for _ in range(k):

        x.requires_grad = True
        x_new = fgsm(model, x, y, eps_step)

        x_new = torch.clamp(x_new, ball_min, ball_max)
        x_new = torch.clamp(x_new, 0, 1)

        x = x_new.detach()

    return x

In [9]:
def test_model_on_attacks(model, attack='pgd', k=10, eps=0.1):
    # TODO: implement this function to test the robust accuracy of the given model
    # use pgd_untargeted() within this function

    eps_step = eps/k
    model.eval()

    correct, correct_second, total = 0, 0, 0

    for data, label in test_loader:
        data = data.to(device)
        label = label.to(device)

        if attack == 'pgd':
            #TODO implement
            adversarial_data = pgd_untargeted(model, data, label, k, eps, eps_step)

            full_data = torch.cat((data, adversarial_data))
            label = torch.cat((label, label))
        else:
            full_data = data

        out = model(full_data)
        _, predicted = torch.max(out.data, 1)
        
        # print(label.size(0))
        # print(data.size(0))
        # print(out.size(0))
        total += label.size(0)
        # print(predicted)
        correct += (predicted[:data.size(0)] == label[:data.size(0)]).sum().item()
        if attack == 'pgd':
            correct_second += (predicted[data.size(0):] == label[:data.size(0)]).sum().item()

    if attack == 'pgd':
        print("for eps", eps)
        print("robust accuracy", 100 * (correct+correct_second) / total)
        print("standard accuracy", 100 * 2*correct / total)
        print("adversarial accuracy", 100 * 2*correct_second / total)
    else:
        print("accuracy", 100 * correct / total)
    

In [34]:
#Interval analysis
def interval_analysis(model, input, eps):

    lb = input - eps
    ub = input + eps

    lb = torch.clamp(lb, 0, 1)
    ub = torch.clamp(ub, 0, 1)
    
    lb_out = model(lb)
    ub_out = model(ub)

    return lb_out, ub_out

In [35]:
def test_robustness(model):

    model.eval()

    for eps in [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1]:
        correct, total = 0, 0

        for data, label in test_loader:
            data = data.to(device)
            label = label.to(device)

            out_lb, out_ub = interval_analysis(model, data, eps)
            out_lb, out_ub = out_lb.argmax(dim=1), out_ub.argmax(dim=1) #choose class for each image
            

            total += label.size(0)
            correct += (out_lb == out_ub).sum().item()
        print("eps:", eps, "percent robust", 100 * correct / total)

    

In [44]:
## train the original model
model = nn.Sequential(Normalize(), Net())
model = model.to(device)
model.train()

start = time.time()
train_model_IBP(model, 15)
end = time.time()
print("IBP train time: ", end - start)

torch.save(model.state_dict(), 'weights_IBP.pt')

IBP train time:  176.51938009262085


In [13]:
model = nn.Sequential(Normalize(), Net())
model = model.to(device)
model.train()

start = time.time()
train_model(model, 15)
end = time.time()
print("standard train time: ", end - start)


torch.save(model.state_dict(), 'weights.pt')

standard train time:  148.0978078842163


In [46]:

model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

test_model(model)

model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights_IBP.pt'))

test_model(model)

accuracy 89.91
accuracy 89.87


In [15]:
## robust test
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

for eps in [.05]:# [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='pgd', k=10, eps=eps)

for eps 0.05
robust accuracy 76.84
standard accuracy 89.91
adversarial accuracy 63.77


In [41]:
## robust test
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights_IBP.pt'))

for eps in [.05]:# [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='pgd', k=10, eps=eps)

for eps 0.05
robust accuracy 74.67
standard accuracy 89.16
adversarial accuracy 60.18


In [42]:
#robustness test
test_robustness(model)

    

eps: 0.01 percent robust 99.12
eps: 0.02 percent robust 98.4
eps: 0.03 percent robust 97.72
eps: 0.04 percent robust 97.01
eps: 0.05 percent robust 96.25
eps: 0.06 percent robust 95.49
eps: 0.07 percent robust 94.72
eps: 0.08 percent robust 93.93
eps: 0.09 percent robust 93.08
eps: 0.1 percent robust 92.2
