### Imports

In [None]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, utils
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import time
import h5py
from pathlib import Path
import os
import matplotlib.pylab as pl

from art.attacks import FastGradientMethod
from art.attacks import CarliniL2Method, CarliniLInfMethod
from art.classifiers.pytorch import PyTorchClassifier
from art.utils import load_mnist

%config InlineBackend.figure_format='retina'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
%matplotlib inline

In [None]:
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

train_dataset = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
train_dataloader = DataLoader(train_dataset, batch_size=128)

test_dataset = TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test))
test_dataloader = DataLoader(test_dataset, batch_size=1000)
test_dataloader_single =  DataLoader(test_dataset, batch_size=1)

In [None]:
epsilons = [0.000001, .1, .2, .3]
pretrained_model = "MNIST_vanilla_model_01.pth"
use_cuda = True

### Classifier

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 512)
        self.fc2 = nn.Linear(512, 200)
        self.fc3 = nn.Linear(200,10)
    
    activations_fc2 = []
    mask_fc2 = torch.zeros(1, 512)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, training=self.training)
        x = self.fc3(x)
        return x
    
    def forwardDetect(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        self.activations_fc2.append(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def forwardMask(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))*self.mask_fc2
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

# Initialize the model.
model = Classifier().to(device)

# # Load pre-trained model
model.load_state_dict(torch.load(pretrained_model, map_location='cpu'))

# # Set the model in evaluation mode. In this case this is for the Dropout layers
print(model.eval())

# Load loss and optimiser
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

# Make a classifier wrapper!
classifier = PyTorchClassifier(
    model=model,
    clip_values=(min_, max_),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)

# Test model
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {} %".format(accuracy * 100))

### Mask Classifier

In [None]:
class MaskModel(nn.Module):
    def __init__(self):
        super(MaskModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 512)
        self.fc2 = nn.Linear(512, 200)
        self.fc3 = nn.Linear(200,10)
    
    mask_fc2 = torch.ones(1, 512)
        
    
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))*self.mask_fc2.to(device)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

# Initialize the model.
mask_model = MaskModel().to(device)

# Load pre-trained model
mask_model.load_state_dict(torch.load(pretrained_model, map_location='cpu'))

# Set the model in evaluation mode. In this case this is for the Dropout layers
print(mask_model.eval())

# Load loss and optimiser
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(mask_model.parameters(), lr=0.01, momentum=0.5)

# Make a classifier wrapper!
mask_classifier = PyTorchClassifier(
    model=mask_model,
    clip_values=(min_, max_),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)

# Test model
predictions = mask_classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {} %".format(accuracy * 100))

### Pruning

In [None]:
model.eval() 
with torch.no_grad():
    for data, target in test_dataloader_single:
        output = model.forwardDetect(data.to(device))

activations_fc2 = model.activations_fc2

In [None]:
n = len(activations_fc2)
element0 = activations_fc2.pop()
act = torch.zeros(n,element0.size(0),element0.size(1))
act[0,:,:] = element0

for e in range(n-1):
    elementX = activations_fc2.pop()
    act[e+1,:,:] = elementX

mean_activations_fc2 = torch.mean(act,dim=[0,1])
ma = mean_activations_fc2.numpy()
plt.hist(ma,80)
plt.title('Activations FC1')
plt.show()

In [None]:
# save different masks for different percentages

masks_fc2 = dict()

for percentage in [0, 3, 6, 9]:
#     FC2
    s_ma_fc2, idx_ma_fc2 = torch.sort(mean_activations_fc2)
    m_fc2 = torch.ones(mean_activations_fc2.size()).to(device)
    nn_fc2 = mean_activations_fc2.size(0)
    ind_r_fc2 = round((percentage/10)*nn_fc2)
    m_fc2[idx_ma_fc2[:ind_r_fc2]] = 0.0
    n1_fc2 = m_fc2.sum()
    model.mask_fc2 = m_fc2
    masks_fc2[percentage/10] = model.mask_fc2

### FGSM attack

In [None]:
fgsm_prune = dict()
n = 1000


# Run test for each epsilon and mask PG 
for (pg, mask_fc2) in masks_fc2.items():
    print('\n Pruned {}'.format(pg))
    accuracies = []
    results = dict()
    
    mask_model.mask_fc2 = mask_fc2
    
    for e in epsilons:
        adv_crafter = FastGradientMethod(mask_classifier, eps=e)
        x_test_adv = adv_crafter.generate(x=x_test[:n])
        predictions = mask_classifier.predict(x_test_adv)
        accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:n], axis=1)) / n
        accuracies.append(accuracy)
        print("Epsilon: {}   Test Accuracy = {}".format(e, accuracy))

    results['accuracies'] = accuracies
    results['epsilons'] = epsilons
    fgsm_prune[pg] = results

### Carlini attack

In [None]:
# Generate adversarial test examples
def CarliniL2(classifier, x_test, init_const):
    attack = CarliniL2Method(classifier=classifier,
                            confidence=0.0,
                            targeted=False,
                            learning_rate=0.01,
                            binary_search_steps=2,
                            initial_const=init_const,
                            batch_size=64)
    x_test_adv = attack.generate(x=x_test)
    return x_test_adv

def CarliniLInf(classifier, x_test, epsilon):
    attack = CarliniLInfMethod(classifier=classifier, 
                              confidence=0.0,
                              targeted=False, 
                              learning_rate=0.01,
                              eps=epsilon, 
                              batch_size=128)
    x_test_adv = attack.generate(x=x_test)
    return x_test_adv

In [None]:
carlini_prune = dict()
constants = [0.1, 1.0, 5]
n = 1000

# Run test for each c and mask PG 
for (pg, mask_fc2) in masks_fc2.items():
    print('\n Pruned {}'.format(pg))
    accuracies = []
    results = dict()
    
    mask_model.mask_fc2 = mask_fc2

    
    for init in constants:
        x_test_adv = CarliniL2(mask_classifier, x_test[:n], init_const=init)
        predictions = mask_classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:n], axis=1)) / n
        accuracies.append(acc)
        
        print('c:', init, ' Accuracy:', acc)

    results['accuracies'] = accuracies
    results['constants'] = constants
    carlini_prune[pg] = results

In [None]:
carlini_inf_prune = dict()
epsilons = [.1, .3, .6]
n = 1000

# Run test for each c and mask PG 
for (pg, mask_fc2) in masks_fc2.items():
    print('\n Pruned {}'.format(pg))
    accuracies = []
    results = dict()
    
    mask_model.mask_fc2 = mask_fc2

    
    for eps in epsilons:
        x_test_adv = CarliniLInf(mask_classifier, x_test[:n], epsilon=eps)
        predictions = mask_classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:n], axis=1)) / n
        accuracies.append(acc)
        
        print('eps:', eps, ' Accuracy:', acc)

    results['accuracies'] = accuracies
    results['epsilons'] = epsilons
    carlini_inf_prune[pg] = results