In [None]:
import matplotlib.pylab as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import glob
import numpy as np
import imageio
import itertools
import foolbox as fb
import foolbox.ext.native as fbn
import time
import torchvision
from model_robust import return_model

In [None]:
# initialisation attack from existing validation images
import eagerpy as ep
import numpy as np

class PrecomputedSamplesAttack:
    """This is a helper attack that makes it straight-forward to choose initialisation points
       for boundary-type attacks from a given data set. All it does is to store samples and
       the predicted responses of a given model in order to select suitable adversarial images
       from the given data set.
    """

    def __init__(self, model):
        self.model = model
        self.samples = []
        self.labels = []
        
    def feed(self, inputs):
        response = self.model.forward(inputs).argmax(1)
        
        for k in range(len(inputs)):
            self.labels.append(int(response[k]))
            self.samples.append(inputs[k])

    def __call__(self, inputs, labels):
        inputs = ep.astensor(inputs)
        labels = ep.astensor(labels)
        x = ep.zeros_like(inputs)
        
        for k in range(len(labels)):
            while True:
                idx = np.random.randint(len(self.labels))
                if int(labels[k].numpy()) != self.labels[idx]:
                    x.tensor[k] = self.samples[idx]
                    break
        
        return x.tensor


In [None]:
data_dir = '../data'
device = torch.device('cuda:0') if torch.cuda.is_available() else 'cpu'

data = {}

metrics = ['L2']
repetitions = 1
step_scale = 1
num_batches = 16

BB_static = {'init_attack' : 'init_attack', 'steps' : int(step_scale * 1000)}
BB_lr = [{'lr' : 1e-3}]



full_attacks = [
    {'L2': [(fbn.attacks.PGD,{'num_steps' : int(1000 * step_scale)}, [{'epsilon' : 8.0/255.0}])], 'Name': 'PGDLinf'},
               #{'L2': [(fbn.attacks.L2BasicIterativeAttack,{'num_steps' : int(1000 * step_scale)}, [{'epsilon' : 2.0}])], 'Name': 'PGDL2'},
               #{'L2': [(fbn.attacks.L2BrendelBethgeAttack, BB_static, BB_lr)],'Name': 'BBL2'},
               #{'L2': [(fb.attacks.DecoupledDirectionNormL2Attack, {}, {'steps' : int(step_scale * 300), 'quantize' : False})], 'Name': 'DDNL2'},
               #{'L2': [(fb.attacks.AdamL1BasicIterativeAttack, {'random_start' : True, 'iterations' : int(step_scale * 200), 'binary_search' : 10, 'epsilon' : 0.1, 'stepsize' : 0.01}, [{}], {'distance' : fb.distances.MeanAbsoluteDistance})], 'Name': 'PGDL1'},
               #{'L2': [(fbn.attacks.LinfinityBrendelBethgeAttack, BB_static, BB_lr)], 'Name': 'BBLinf'}
]



# load model
test_batch_size = 20

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=test_batch_size, shuffle=False, num_workers=2)

In [None]:
namesss = ['convlinearl1k3c32relu']

for attacks in full_attacks:
    for namm in namesss:
        model_name = namm
        resnet = return_model(model_name)
        resnet = resnet.to(device)
        resnet.eval();
        checkpoint = torch.load('./cifar_resnet/ckpt_{}.pth'.format(model_name))
        for i in range(checkpoint['epoch'],checkpoint['epoch']+1):
            resnet.load_state_dict(checkpoint['net'])

            # init foolbox models
            fbn_model = fbn.models.PyTorchModel(resnet, bounds=[0.0, 1.0], device=device)
            fb_model = fb.models.PyTorchModel(resnet, bounds=[0.0, 1.0], device=device, num_classes=10)   

            for metric in metrics:
                for attack in attacks[metric]:
                    iteration = 0
                    print(attack)
                    if len(attack) == 3:
                        Attack, static_kwargs, dynamic_kwargs = attack
                        native = 'foolbox.ext.native' in Attack.__module__
                        attack = Attack(fbn_model) if native else Attack(fb_model)
                    else:
                        Attack, static_kwargs, dynamic_kwargs, init_kwargs = attack
                        native = 'foolbox.ext.native' in Attack.__module__
                        attack = Attack(fbn_model) if native else Attack(fb_model, **init_kwargs)

                    name = str(attack.__class__).split('.')[-1].split("'")[0]
                    if metric == 'L2':
                        bbattack = fbn.attacks.L2BrendelBethgeAttack(fbn_model)

                    if native:
                        model = fbn_model
                        print("Native")
                    else:
                        model = fb_model
                    # create init attack if necessary
                    if 'init_attack' in static_kwargs.keys():
                        init_attack = PrecomputedSamplesAttack(fbn_model)

                        for batch in testloader:
                            inputs, labels = batch
                            inputs = inputs.to(device)
                            labels = labels.to(device)

                            out = init_attack.feed(inputs)

                        static_kwargs['init_attack'] = init_attack         


                # perform attack with different arguments
                    img = []
                    for kwarg in dynamic_kwargs:
                        kwargs = {**kwarg, **static_kwargs}
                        print(kwarg)
                        images = []
                        for b, batch in enumerate(testloader):
                            if b == 100:
                                break
                            check = time.time()
                            inputs, labels = batch
                            inputs = inputs.to(device)
                            labels = labels.to(device)
                            if not native:
                                inputs = inputs.data.cpu().numpy()
                                labels = labels.data.cpu().numpy()
                            adversarials = attack(
                                inputs,
                                labels,
                                **kwargs
                            )

                            out = model.forward(adversarials)
                            is_adv = out.argmax(1) != labels
                            out_x = model.forward(inputs)
                            is_cor = out_x.argmax(1) == labels

                            # check if adversarial
                            if native:
                                out = out.data.cpu().numpy()
                                adversarials = adversarials.data.cpu().numpy()
                                inputs = inputs.data.cpu().numpy()
                                labels = labels.data.cpu().numpy()
                            output = out.argmax(1)
                            for k in range(len(inputs)):
                                if is_adv[k] and is_cor[k]:
                                    x0 = inputs[k]
                                    x = adversarials[k]
                                    images.append([x, x0,labels[k],output[k]])
                                else:
                                    x0 = inputs[k]
                                    x = np.zeros((3,32,32))
                                    images.append([x,x0,labels[k],None])
                            if b % 10 == 0:
                                print(model_name,'{}/{}'.format(i,checkpoint['epoch']), 'Time: ', time.time() - check)
                        img.append(images)


                    np.save("./cifar_resnet/{}_{}_adversarial.npy".format(model_name, attacks['Name']),img)

                    # delete model to free memory
                    del fb_model
                    del fbn_model

In [None]:
class VanillaBackprop():
    """
        Produces gradients generated with vanilla back propagation from the image
    """
    def __init__(self, model):
        self.model = model
        self.gradients = None
        # Put model in evaluation mode
        self.model.eval()
        # Hook the first layer to get the gradient
        self.hook_layers() 

    def hook_layers(self):
        def hook_function(module, grad_in, grad_out):
            self.gradients = grad_in[0]

        # Register hook to the first layer
        first_layer = list(self.model.module._modules.items())[0][1]
        first_layer.register_backward_hook(hook_function)

    def generate_gradients(self, input_image, target_class):
        # Forward
        input_image.requires_grad_(True)
        scores = self.model(input_image)
        # Zero grads
        self.model.zero_grad()
        # Target for backprop
        score_max_index = scores.argmax()
        score_max = scores[0,score_max_index]
        dc_dx = torch.autograd.grad(score_max, input_image)[0]

        return dc_dx[0,...].cpu().numpy()

In [None]:
# load model
test_batch_size = 1

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=test_batch_size, shuffle=False, num_workers=2)

In [None]:
for nam in namesss:
    resnet = return_model(nam) 
    resnet = torch.nn.DataParallel(resnet)
    resnet = resnet.to(device)
    resnet.eval()
    checkpoint = torch.load('./cifar_resnet/ckpt_{}.pth'.format(nam))
    for i in range(checkpoint['epoch'],checkpoint['epoch']+1):
        lss = []
        resnet.load_state_dict(checkpoint['net'])
        VBP = VanillaBackprop(resnet)
        for b, batch in enumerate(testloader):
            if b == 2000:
                break
            inputs.requires_grad = True
            inputs = inputs.to(device)
            out_x = resnet.forward(inputs)
            cat = out_x.argmax(1)
            vanilla_grads = VBP.generate_gradients(inputs, int(cat.cpu().numpy()))
            vanilla_grads = (vanilla_grads - vanilla_grads.mean())/vanilla_grads.std()
            vanilla_grads = np.abs(vanilla_grads)
            vanilla_grads = vanilla_grads - vanilla_grads.min()
            vanilla_grads = vanilla_grads/(vanilla_grads.max() - vanilla_grads.min())
            lss.append(vanilla_grads)

        np.save("./cifar_resnet/{}_saliency.npy".format(nam),lss)



In [None]:
distances = {}
import numpy as np
for mm in namesss:
    a = np.load("./cifar_resnet/{}_PGDLinf_adversarial.npy".format(mm), allow_pickle=True)
    a = a[0]
    distance = []
    for i in range(a.shape[0]):
        adv = a[i,0]
        original = a[i,1]
        adv_cls = a[i,3]
        if adv_cls != None:
            dist = np.linalg.norm((adv-original).flatten(), ord=2)
            distance.append(dist)
    distances[mm] = distance

In [None]:
print('Model,          Mean Norm,       Std Norm')
for key in distances.keys():
    mean_value = np.array(distances[key]).mean()
    std_value = np.array(distances[key]).std()
    print(key, mean_value, std_value)

In [None]:
distances = {}
import numpy as np
for mm in namesss:
    a = np.load("./cifar_resnet/{}_PGDLinf_adversarial.npy".format(mm), allow_pickle=True)
    checkpoint = torch.load('./cifar_resnet/ckpt_{}.pth'.format(mm))
    #images.append([x,x0,labels[k],None])
    a = a[0]
    distance = 0
    for i in range(a.shape[0]):
        adv = a[i,0]
        original = a[i,1]
        adv_cls = a[i,3]
        if adv_cls != None:
            distance = distance + 1
    distances[mm] = distance/a.shape[0]
for key in distances.keys():
    print(key, distances[key]*100/checkpoint['acc'])