# Boilerplate

Package installation, loading, and dataloaders. There's also a simple model defined. You can change it your favourite architecture if you want.

In [2]:
# !pip install tensorboardX

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt

from torchvision import datasets, transforms
# from tensorboardX import SummaryWriter

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class ThreeLayerFC(nn.Module):
    def __init__(self):
        super(ThreeLayerFC, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 50)
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 50)
        self.fc4 = nn.Linear(50, 10)

    def forward(self, x):
        x = x.view((-1, 28 * 28))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

    def interval_bound_propagation(self, L, U):
        L = L.view(-1, 28 * 28)
        U = U.view(-1, 28 * 28)
        L_fc1 = self.fc1(L)
        U_fc1 = self.fc1(U)

        L_fc1 = F.relu(L_fc1)
        U_fc1 = F.relu(U_fc1)

        L_fc2 = self.fc2(L_fc1)
        U_fc2 = self.fc2(U_fc1)

        L_fc2 = F.relu(L_fc2)
        U_fc2 = F.relu(U_fc2)

        L_fc3 = self.fc3(L_fc2)
        U_fc3 = self.fc3(U_fc2)

        L_fc3 = F.relu(L_fc3)
        U_fc3 = F.relu(U_fc3)

        L_fc4 = self.fc4(L_fc3)
        U_fc4 = self.fc4(U_fc3)

        return L_fc4, U_fc4

model = ThreeLayerFC().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 22658199.35it/s]


Extracting mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 624143.76it/s]


Extracting mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 5573767.26it/s]


Extracting mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2579624.75it/s]

Extracting mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw






# Implement Adversarial Training

In [3]:
def train_model(model, num_epochs, enable_defense=True, attack='pgd', eps=0.1):
    # TODO: implement this function that trains a given model on the MNIST dataset.
    # this is a general-purpose function for both standard training and adversarial training.
    # (toggle enable_defense parameter to switch between training schemes)
    model.train()
    loss = nn.CrossEntropyLoss()

    # If defense is enabled, add adversial examples to the training set
    for epoch in range(num_epochs):
      print(f'Epoch {epoch + 1}:')
      correct = 0
      total_samples = 0
      for i, data in enumerate(train_loader):
        inputs, labels = data

        if enable_defense:
          inputs = pgd_untargeted(model, inputs, labels, 10, eps, 0.01)

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        cost = loss(outputs, labels)
        cost.backward()

        optimizer.step()


In [None]:
def test_model_on_attacks(model, attack='pgd', eps=0.1):
    # TODO: implement this function to test the robust accuracy of the given model
    # use pgd_untargeted() within this function
    model.eval()
    correct = 0
    total = 0
    for x, y in test_loader:
      images = pgd_untargeted(model, x, y, 10, eps, 0.01)
      labels = y
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    print(f'Robust accuracy: {(100 * correct / total):2f}%')



# Study Accuracy, Quality, etc.

Compare the various results and report your observations on the submission.

Train the model using MNIST dataset.

In [4]:
train_model(model, 5, False)
torch.save(model.state_dict(), 'weights.pt')

Epoch 1:
Epoch 2:
Epoch 3:
Epoch 4:
Epoch 5:


Print standard accuracy

In [30]:
correct = 0
total = 0
with torch.no_grad():
  for data in test_loader:
    images, labels = data

    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    # the class with the highest energy is what we choose as prediction
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    del images, labels, outputs

print(f'Standard accuracy: {100 * correct // total}%')

Standard accuracy: 92%


In [17]:
epsilons = np.linspace(0.01, 0.1, 10)

total_samples = len(test_loader.dataset)
robustness_results = torch.zeros(len(epsilons))
model.eval()

for j, epsilon in enumerate(epsilons):
    correct_robust = 0

    for i, (data, target) in enumerate(test_loader):
        data = data.view(-1, 28, 28)  # reshape input
        original_output = model(data).argmax(dim=1) # original prediction

        # Define the lower and upper bounds for the L-infinity ball
        L_input = torch.clamp(data - epsilon, 0, 1)
        U_input = torch.clamp(data + epsilon, 0, 1)

        L_out, U_out = model.interval_bound_propagation(L_input, U_input)

        lower_bound_class = L_out.argmax(dim=1)
        upper_bound_class = U_out.argmax(dim=1)

        if (lower_bound_class == original_output).all() and (upper_bound_class == original_output).all():
          correct_robust += 1

    robustness_results[j] = correct_robust / total_samples
    print(f"Epsilon: {epsilon.item():.2f}, Robustness: {robustness_results[j]*100:.2f}%")

Epsilon: 0.01, Robustness: 0.89%
Epsilon: 0.02, Robustness: 0.54%
Epsilon: 0.03, Robustness: 0.39%
Epsilon: 0.04, Robustness: 0.28%
Epsilon: 0.05, Robustness: 0.15%
Epsilon: 0.06, Robustness: 0.12%
Epsilon: 0.07, Robustness: 0.07%
Epsilon: 0.08, Robustness: 0.05%
Epsilon: 0.09, Robustness: 0.01%
Epsilon: 0.10, Robustness: 0.01%
