### Imports

In [None]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, utils
from torch.utils.data import TensorDataset, DataLoader
import torch.backends.cudnn as cudnn
import time
from pathlib import Path
import os

from art.attacks.evasion import FastGradientMethod, CarliniL2Method, CarliniLInfMethod, ZooAttack
from art.estimators.classification import PyTorchClassifier
from art.utils import load_mnist

from quant_mnist_model import *
from QuantModules import *
from _quantUtils import *
from _utils import train, test


%matplotlib inline
%config InlineBackend.figure_format='retina'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
use_cuda = True

In [None]:
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

train_dataset = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
train_dataloader = DataLoader(train_dataset, batch_size=128)

test_dataset = TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test))
test_dataloader = DataLoader(test_dataset, batch_size=1000)

###  Load pretrained quant model

In [None]:
PATH = 'mnist_4bit.pth'

# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

# Initialize models.
net = classifier().to(device)

# Load pre-trained model
net.load_state_dict(torch.load(PATH, map_location='cpu'))

# Load loss and optimiser
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=.01, weight_decay=1e-4)

# Make a classifier wrapper!
classifier = PyTorchClassifier(
    model=net,
    clip_values=(min_, max_),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)


# Test model
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {} %".format(accuracy * 100))

### FGSM attack

In [None]:
epsilons = [.1, .2, .3]

for e in epsilons:
    adv_crafter = FastGradientMethod(classifier, eps=e)
    x_test_adv = adv_crafter.generate(x=x_test)
    predictions = classifier.predict(x_test_adv)
    accuracy = 100.*(np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test))
    print("Epsilon: {}   Test Accuracy = {}".format(e, accuracy))

### C&W attack

In [None]:
# Generate adversarial samples
def CarliniL2(classifier, x_test, init_const=0.01):
    attack = CarliniL2Method(classifier=classifier,
                            confidence=0.0,
                            targeted=False,
                            learning_rate=0.01,
                            binary_search_steps=9,
                            initial_const=init_const,
                            batch_size=1)
    x_test_adv = attack.generate(x=x_test)
    return x_test_adv

def CarliniLInf(classifier, x_test, epsilon=0.3):
    attack = CarliniLInfMethod(classifier=classifier, 
                              confidence=0.0,
                              targeted=False, 
                              learning_rate=0.01,
                              eps=epsilon, 
                              batch_size=128)
    x_test_adv = attack.generate(x=x_test)
    return x_test_adv

In [None]:
# create adversarial samples on quant models and attack quant models (runtime: 4bit=23min, 3bit=33min, 2bit=30min)

n = 200
x_test_adv_c2 = CarliniL2(classifier, x_test[:n])
predictions = classifier.predict(x_test_adv_c2)
acc = 100*(np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:n], axis=1)) / n)
    
print(' Accuracy:', acc)

In [None]:
n = 1000 #18s runtime
x_test_adv_cinf = CarliniLInf(classifier, x_test[:n])
predictions = classifier.predict(x_test_adv_cinf)
acc = 100*(np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:n], axis=1)) / n)
    
print(' Accuracy:', acc)

### ZOO attack

In [None]:
# ZOO attack is a gradient-free attack
def ZOO(classifier, x_test):
    attack = ZooAttack(classifier=classifier,
                        learning_rate=1,
                        binary_search_steps=3,
                        initial_const=1,
                        variable_h=0.1)
    x_test_adv = attack.generate(x=x_test)
    return x_test_adv

In [None]:
n = 200
x_test_adv_z = ZOO(classifier, x_test[:n])
predictions = classifier.predict(x_test_adv_z)
acc = 100*(np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:n], axis=1)) / n)
    
print(' Accuracy:', acc)