# Code
Beware of computation time and file sizes... You can either run the notebook and compute most thing, or get a zipped data folder with all the heavy stuff precomputed, then you can run the notebook and it will automagically skip some computation and display the pre-computed stuff instead.

## Imports

In [None]:
# installing required python libraries
# you can run this once and then (re-)comment it
# !pip install -q -r reqs.txt

In [None]:
%matplotlib inline

import os
import wget
from zipfile import ZipFile
import matplotlib.pyplot as plt
import torch
from torchvision.models import resnet34
import numpy as np
from lucent.optvis import render, objectives
from PIL import Image
from lucent.misc.io.showing import _display_html, images
from lucent.modelzoo.util import get_model_layers

import fv_utils as fv
from importlib import reload
_ = reload(fv)

## Files prep

Our project uses the following folder structure. If the cell below fails for some reason, make sure to manually check that you have the same folder structure and manually download any required data like the robust model.

```
PROJECT_ROOT/
|--project.ipynb
|--fv_utils.ipynb
|--reqs.txt
|--data/
|  |--dataset/
|  |  |...imagenet100 data from kaggle
|  |--output/
|  |--model/
|  |  |--resnet34.pth.tar
|  |--label/
|  |  |--Labels100.json
|  |  |--Labels1000.json
|  |  |--100_to_1000.json
|  |--gradient/
|  |  |--robust_data_grad.pt
|  |  |--standard_data_grad.pt
|  |--adversarial_examples/
```

In [None]:
ROOT = %pwd
print(ROOT)

In [None]:
# create folder structure
data_dir = os.path.join(ROOT, "data")
if not os.path.exists(data_dir):
    # zip archive of the data folder ~3.8 GB zipped, ~ 13 GB unzipped
    data_dir_zip = os.path.join(ROOT, "data.zip")
    if not os.path.exists(data_dir_zip):
        print("Downloading", data_dir_zip, "...")
        wget.download(fv.data_folder_url, out=data_dir_zip)
    with ZipFile(data_dir_zip, mode='r') as zip_ref:
        zip_ref.extractall(path=ROOT)

data_folders = [
    "model", "label", "gradient", "output", "dataset", "adversarial_examples"
]
model_dir, label_dir, gradient_dir, output_dir, dataset_dir, advex_dir = [
    os.path.join(data_dir, f) for f in data_folders
]

robust_resnet_34_file = os.path.join(model_dir, "resnet34.pth.tar")
labels100_file = os.path.join(label_dir, "Labels100.json")
labels1000_file = os.path.join(label_dir, "Labels1000.json")
labels_conversion_file = os.path.join(label_dir, "100_to_1000.json")
robust_gradients_file = os.path.join(gradient_dir, "robust_data_grad.pt")
standard_gradients_file = os.path.join(gradient_dir, "standard_data_grad.pt")

# check everything is here
if (
    os.path.exists(robust_resnet_34_file)
    and os.path.exists(labels100_file)
    and os.path.exists(labels1000_file)
    and os.path.exists(labels_conversion_file)
    and os.path.exists(robust_gradients_file)
    and os.path.exists(standard_gradients_file)
    and os.path.exists(output_dir)
    and os.path.exists(dataset_dir)
    and os.path.exists(advex_dir)
    and len(os.listdir(dataset_dir)) == 100
   ):
    print("Ok!")
else:
    print("You seem to be missing some files...")

## Load models

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# First lets load the normal model
standard_model = resnet34(pretrained=True).to(device).eval()

# Then we load the robustly train model 
robust_model = resnet34(pretrained=True)
states_dict = torch.load(robust_resnet_34_file, map_location=device)
new_dict = {}
for k in states_dict['model']:
    new_dict[".".join(k.split(".")[1:])] = states_dict['model'][k]
    
#loading the model and copying it in the ancient model 
robust_model.load_state_dict(new_dict)
_= robust_model.to(device).eval()

del new_dict, states_dict
fv.cleanMemGPU()

robust_model.name = "Robust model"
standard_model.name = "Non-robust model"

## Print some models info

In [None]:
# print summary of layers, same for robust and non-robust
fv.print_model_info(standard_model)

## Visualize some channels

In [None]:
channels = ["layer1:0", "layer2:2", "layer3:2", "layer3:4", "layer4:2"]
# visualize some channels thanks to lucent FV rendering function
fv.visualize_channels(channels, standard_model, output_dir)
fv.visualize_channels(channels, robust_model, output_dir)

## DeepDream objective

### Objective function

In [None]:
from lucent.optvis.objectives import wrap_objective
# Deep dream objective function 
@wrap_objective()
def deepdream(layer_name):
    def inner(model):
        return -(model(layer_name) ** 2).mean() # negative optimized (to maximize loss)
    return inner

### Parametrization functions

In [None]:
from lucent.optvis import param
# CPPN parametrization
cppn_param_f = lambda: param.cppn(128)
# optimizer with lower learning rate for CPPN
cppn_opt = lambda params: torch.optim.Adam(params, 5e-3)

### Visualization function

In [None]:
def visualize_deepdream(model, layer, param_f=None, optimizer=None, f_ext=""):
    # Visualize layer with deep dream objective for non-robust model
    f = os.path.join(output_dir, model.name+"_"+layer+"_"+f_ext+"_deepdream_sequence.pt")
    if os.path.exists(f):
        sequence = torch.load(f)
    else:
        sequence = np.array(render.render_vis(
            model=model,
            objective_f=deepdream(layer),
            param_f=param_f,
            optimizer=optimizer,
            transforms=[],
            show_inline=False,
            show_image=False,
            thresholds=range(512)
        ))
        torch.save(sequence, f)
        
    shape = sequence.shape
    return sequence.reshape(shape[0], shape[2], shape[3], shape[4])
    

### Visualize

In [None]:
standard_sequence_cppn = visualize_deepdream(
    standard_model, "layer4_2", param_f=cppn_param_f, optimizer=cppn_opt, f_ext="cppn"
)
robust_sequence_cppn = visualize_deepdream(
    robust_model, "layer4_2", param_f=cppn_param_f, optimizer=cppn_opt, f_ext="cppn"
)
standard_sequence = visualize_deepdream(standard_model, "layer4_2")
robust_sequence = visualize_deepdream(robust_model, "layer4_2")

In [None]:
_display_html(
    fv.html_sequence(standard_sequence_cppn) + 
    fv.html_sequence(standard_sequence) + 
    fv.html_sequence(robust_sequence_cppn) + 
    fv.html_sequence(robust_sequence)
)

## Adversarial attack

### Load labels

In [None]:
import json
# Convert from ImageNet100 code to actual label
with open(labels100_file) as json_file:
    code_to_label_100 = json.load(json_file)
# Convert from model output (ImageNet1000) code to actual label
with open(labels1000_file) as json_file:
    code_to_label_1000 = json.load(json_file)
# convert 100 index to 1000 index
with open(labels_conversion_file) as json_file:
    labels_100_to_1000 = json.load(json_file)

def indexFromCode(code):
    return torch.tensor(labels_100_to_1000[code])

### Load data

In [None]:
from torch.utils.data import DataLoader
dataloader = list(DataLoader(
    # limit to 100 examples
    fv.ImageNet100ValDataset(dataset_dir, 100),
    batch_size=1,
    shuffle=False
))

### Display function

In [None]:
import torchvision.transforms as T

for i in range(100):  
    # get image and label from dataloader
    image, features, labels = dataloader[i]
    image = image[0].squeeze()
    features = features.to(device)
    label = code_to_label_100[labels[0]]
    
    # get model output
    with torch.no_grad():
        std_output = standard_model(features)
        rob_output = robust_model(features)
        std_output_label = code_to_label_1000[str(std_output.argmax().item())]
        rob_output_label = code_to_label_1000[str(rob_output.argmax().item())]

    # stop when prediction difference
    if (rob_output_label == label and rob_output_label != std_output_label) or True:
        # display output
        image = T.ToPILImage()(image)
        print(f"Correct label   : {label}")
        print(f"Non robust model label : {std_output_label}")
        print(f"Robust model label : {rob_output_label}")
        display(image)
        break
    

### Checking predcition accuracies
Takes some time, you can skip it

In [None]:
from tqdm import tqdm

dataloader = DataLoader(
    fv.ImageNet100ValDataset(dataset_dir),
    batch_size=1,
    shuffle=False
)
data_len = len(dataloader)

def acc(model):
    with torch.no_grad():
        correct = 0
        for _, data, target in tqdm(dataloader):
            output = model(data.to(device))
            idx = output.argmax().item()
            correct += code_to_label_1000[str(idx)] == code_to_label_100[target[0]]
        return 100 * correct / data_len

In [None]:
# Accuracy on the test set for robust model: 79.60%
# Accuracy on the test set for standard model: 79.72%
# set this to True to compute accuracies, but it's time-consuming and not very useful
compute=False
if compute:
    ra = acc(robust_model)
    print('Accuracy on the test set for robust model: {:.2f}%'.format(ra))
    sa = acc(standard_model)
    print('Accuracy on the test set for standard model: {:.2f}%'.format(sa))

### FGSM attack

$$x^{adv} = x + \epsilon \cdot sign(\nabla_x J(x, y_{true})$$

#### Attack function

In [None]:
# https://pytorch.org/tutorials/beginner/fgsm_tutorial.html
def fgsm_attack(image, epsilon, data_grad):
    # data_grad already contains the sign() of the gradients (pre-computed)
    return image + epsilon * data_grad

#### Compute gradients

In [None]:
import torch.nn.functional as F

def compute_gradients(model):
    print(model.name)
    
    data_grads = []
    
    # Loop over all examples in test set
    for _, data, target in tqdm(dataloader):
        # Send the data and label to the device
        data = data.to(device)
        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True
        # Forward pass the data through the model
        output = model(data)
        # Zero all existing gradients
        model.zero_grad()
        # Calculate the loss
        F.nll_loss(
            output,
            torch.tensor((indexFromCode(target[0]),)).to(device)
        # Calculate gradients of model in backward pass
        ).backward()
        # Collect gradient signs
        data_grads.append(data.grad.data.sign())
        
        # enable this at each iteration if you have memory issues (much slower)
        # del data, output
        # fv.cleanMemGPU()
        
    # Return the collected data_grads
    print()
    return data_grads


In [None]:
# only run once, requires ~ min. 4GB of memory
# set to True to compute, or dl the precomputed gradients
compute=False
if compute:
    # robust
    fv.cleanMemGPU()
    robust_grad_data = compute_gradients(robust_model)
    torch.save(robust_grad_data, robust_gradients_file)
    del robust_grad_data
    fv.cleanMemGPU()
    # standard
    standard_grad_data = compute_gradients(standard_model)
    torch.save(standard_grad_data, standard_gradients_file)
    del standard_grad_data
    fv.cleanMemGPU()

#### Test epsilons

In [None]:
dataloader = DataLoader(
    fv.ImageNet100ValDataset(dataset_dir),
    batch_size=1,
    shuffle=False
)
data_len = len(dataloader)

In [None]:
def test_epsilons(model, epsilons, data_grads):
    with torch.no_grad():
        print(model.name, "with epsilon limit of", epsilons[-1])
        
        # Accuracy counter
        accuracies = []
        eps_len = len(epsilons)
        for i, e in enumerate(epsilons):
            correct = 0
            # Loop over all examples in test set
            for j, (_, data, target) in enumerate(dataloader):
                perturbed_image = fgsm_attack(
                    data.to(device),
                    e,
                    data_grads[j].to(device)
                )
                output = model(perturbed_image)
                # Check for success
                correct += (
                    code_to_label_1000[str(output.argmax().item())]
                    == 
                    code_to_label_100[target[0]]
                )
                print(
                    "Progress:", j + 1, "/", data_len,
                    "\tEpsilons", i + 1, "/", eps_len, end="\r"
                )
                # uncomment this if memory issue, much slower
                # del perturbed_image, output
                # fv.cleanMemGPU()
                
            # Calculate final accuracies for each epsilons
            accuracies.append(correct / data_len)
    
        # Return the accuracies
        print()
        return accuracies

In [None]:
# compute and plot the epsilons
def plot_epsilons(epsilon_limit, num_samples=50):
    file_name = "epsilon_accuracies_{}".format(epsilon_limit)
    accuracy_file = os.path.join(output_dir, file_name)
    # log range between 0 and epsilon_limit
    epsilon_range = np.logspace(0, 1, num=num_samples) - 1
    epsilon_range = epsilon_range / ((1/epsilon_limit) * epsilon_range[-1])
    if not os.path.exists(accuracy_file):
        fv.cleanMemGPU()
        rob_gradients = torch.load(robust_gradients_file, map_location="cpu")
        epsilon_accuracies_robust = test_epsilons(
            robust_model, epsilon_range, rob_gradients
        )
        del rob_gradients
        fv.cleanMemGPU()
        std_gradients = torch.load(standard_gradients_file, map_location="cpu")
        epsilon_accuracies_standard = test_epsilons(
            standard_model, epsilon_range, std_gradients
        )
        del std_gradients
        fv.cleanMemGPU()
        accuracies = [
            epsilon_accuracies_robust,
            epsilon_accuracies_standard
        ]
        torch.save(accuracies, accuracy_file)
    accuracies = torch.load(accuracy_file)
    for i, accs in enumerate(accuracies):
        plt.plot(
            epsilon_range, accs, 
            label=(robust_model.name if i == 0 else standard_model.name)
        )
    plt.title("Model accuracies w.r.t to FGSM attack epsilons")
    plt.xlabel("epsilon")
    plt.xscale("logit")
    plt.xticks(ticks=epsilon_range[::max(len(epsilon_range)//10, 1)], minor=False)
    plt.tick_params(
        axis="x",
        labelrotation=90,
        grid_color="black",
        grid_alpha=0.2,
        grid_linewidth=1
    )
    plt.grid(visible=True)
    plt.ylabel("accuracy")
    plt.legend()
    plt.show()

In [None]:
# choose an upper bound for epsilon
epsilon_limits = [3e-1]
# default: logspace of 50 epsilons between 0 and epsilon_limit included
# change this by setting the num_samples to whatever value you prefer below
# takes a GPU and a lot time with 50...
compute=False
if compute:
    for el in epsilon_limits:
        plot_epsilons(el, num_samples=50)

#### Display attack

In [None]:
dataloader = DataLoader(
    fv.ImageNet100ValDataset(dataset_dir),
    batch_size=1,
    shuffle=False
)
data_len = len(dataloader)

In [None]:
def attack(model, epsilon, grad_sign, advex_number=10, image_id=None):
    with torch.no_grad():
        print(model.name, "with epsilon of", epsilon)

        advex = dict()
        # Loop over all examples in test set
        for i, (image, data, target) in enumerate(tqdm(dataloader)):
            if image_id == None or i == image_id:
                data = data.to(device)
                true_label = code_to_label_100[target[0]]
                original_idx_1000 = str(model(data).argmax().item())
                original_prediction = code_to_label_1000[original_idx_1000]
    
                # predict pertured image
                perturbed_data = fgsm_attack(
                    data,
                    epsilon,
                    grad_sign[i].to(device)
                )
                perturbed_idx_1000 = str(model(perturbed_data).argmax().item())
                perturbed_prediction = code_to_label_1000[perturbed_idx_1000]
                
                # Check for success
                # accuracy += true_label == perturbed_prediction
                if (
                    (image_id != None or perturbed_prediction != true_label)
                    and original_prediction == true_label
                    and len(advex) < advex_number
                ):
                    adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                    advex[i] = (true_label, perturbed_prediction, adv_ex)
                if len(advex) >= advex_number:
                    break

        # Return the accuracies
        print()
        return advex

In [None]:
# values from the paper on fgsm
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

def denorm(img):
    return ((std * img.T) + mean).T

def plot_stack(stack, epsilon, cmap="hsv"):
    stack_len = len(stack)
    cols = 3
    rows = (stack_len // cols) + 1
    fig = plt.figure(layout="constrained", figsize=(20, 20))
    for i in range(stack_len):
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(np.transpose(denorm(stack[i][2]), (1, 2, 0)), cmap=cmap)
        plt.title("Epsilon: {}\nOriginal Label: {}\nPredicted Label: {}".format(
            epsilon, stack[i][0], stack[i][1]
        ))
        plt.axis('off')
    plt.show()

In [None]:
def _advex(epsilon=0.1, advex_number=100, image_id=None):
    fv.cleanMemGPU()
    rob_grad_sign = torch.load(robust_gradients_file, map_location="cpu")
    rob_advex = attack(robust_model, epsilon, rob_grad_sign, advex_number=advex_number, image_id=image_id)
    del rob_grad_sign
    fv.cleanMemGPU()
    std_grad_sign = torch.load(standard_gradients_file, map_location="cpu")
    std_advex = attack(standard_model, epsilon, std_grad_sign, advex_number=advex_number, image_id=image_id)
    del std_grad_sign
    fv.cleanMemGPU()
    print("Collected", advex_number, "adversarial examples.")
    return rob_advex, std_advex

In [None]:
from random import shuffle
# Display the perturbed image, original label, and predicted label

# select a specific image_id to analyze, and set the number to 1
# eg, 115 for the flamingos that are not flamingos
epsilon = 0.1
advex_number = 100
image_id = None
ra, sa = _advex(epsilon=epsilon, advex_number=advex_number, image_id=image_id)

number_of_examples_to_display = 3 # per model
ex = []
ks = list(ra.keys())
shuffle(ks)
for k in ks:
    if k in sa:
        ex.append(ra[k])
        ex.append(sa[k])
    if len(ex) >= 2*number_of_examples_to_display:
        break
plot_stack(ex, epsilon)

In [None]:
# uncomment this code if you don't have the last data.zip version with the flamingos

# image 115
# flamingos = []
# ex = []
# es = [0, 0.01, 0.35]
# image_id = 115
# advex_number = 1
# for epsilon in es:
#     flamingos.append(_advex(epsilon=epsilon, advex_number=advex_number, image_id=image_id))

# for f in flamingos:
#     ex.append(f[0][image_id])
# for f in flamingos:
#     ex.append(f[1][image_id])

# plot_stack(ex, epsilon)

flamingo_std = [
    os.path.join(output_dir, "flamingo-e0.png"),
    os.path.join(output_dir, "flamingo-e0.01.png"),
    os.path.join(output_dir, "flamingo-e0.35.png")
]
flamingo_rob = [
    os.path.join(output_dir, "flamingo_rob_e0.png"),
    os.path.join(output_dir, "flamingo_rob_e0.01.png"),
    os.path.join(output_dir, "flamingo_rob_e0.35.png")
]

s = []
for f in flamingo_rob:
    s.append(Image.open(f).convert("RGB"))
images(s)

#### Visualization gradient 

In [None]:
flamingos = []
ex = []
es = [0, 0.01, 0.35]
image_id = 115
advex_number = 1
for epsilon in es:
    flamingos.append(_advex(epsilon=epsilon, advex_number=advex_number, image_id=image_id))

for f in flamingos:
    ex.append(f[0][image_id])
for f in flamingos:
    ex.append(f[1][image_id])

plot_stack(ex, epsilon)

In [None]:
per = flamingos[1][0][115][2] - flamingos[0][0][115][2]
per[1].shape

In [None]:
plt.imshow(np.transpose(per/0.3, (1, 2, 0))) # grad of image
plt.show()
plt.imshow(per[0]/0.3) # channel 1
plt.show()
plt.imshow(per[1]/0.3) # channel 2
plt.show()
plt.imshow(per[2]/0.3) # # channel 3
plt.show()


In [None]:
pixels = per[1].flatten()

# Plot the histogram
plt.hist(pixels, bins=256, range=(-0.4, 0.4), color='gray', alpha=0.9)
plt.xlabel('Pixel Value')
plt.ylabel('Frequency')
plt.title('Histogram of Pixel Values')
#plt.xscale('log')
plt.yscale('log')
plt.show()

In [None]:
pixels = (per[0].flatten()/0.3)

# Plot the histogram
plt.hist(pixels, bins=100, color='gray', alpha=0.5)
plt.xlabel('Pixel Value')
plt.ylabel('Frequency')
plt.title('Histogram of Pixel Values')
#plt.xscale('log')
plt.yscale('log')
plt.show()

## Feature extraction

### Load adversarial examples

In [None]:
# # Get adversarial examples
# advex_dir = os.path.join(data_dir, "adversarial_examples")

# advex_std_01 = torch.load(os.path.join(advex_dir, "adv_example_normal_1.pt"))
# advex_std_01_dir = os.path.join(advex_dir, "adv_example_normal_1")
# os.makedirs(advex_std_01_dir, exist_ok=True)

# # Make separate into new files to avoid memory problems
# for i in range(len(advex_std_01)):
#     torch.save(advex_std_01[i], os.path.join(advex_std_01_dir, f'{i}.pt'))

# del advex_std_01
# fv.cleanMemGPU()

### Compute neuron sensitivities

In [None]:
from torchvision.models.feature_extraction import \
    create_feature_extractor, get_graph_node_names

# Initialize data loader
image_loader = DataLoader(
    fv.ImageNet100ValDataset(dataset_dir),
    batch_size=1,
    shuffle=False
)
data_len = len(image_loader)
        
def compute_neuron_sensitivity(model, adversarial_examples):
    with torch.no_grad():
        adv_len = len(adversarial_examples)
        
        # Make model output all neuron outputs
        _, eval_nodes = get_graph_node_names(model)  # get model's layer names
        neurons_model = create_feature_extractor(model, return_nodes=eval_nodes) 

        # initialize neuron sensitivities to be empty
        sensitivities_sum = {param: 0 for param in eval_nodes}
        
        # Loop over all examples in image set and adversarial examples
        for i, (image, data, target) in enumerate(image_loader):
            if i >= adv_len:
                break
            # Outputs
            img_neurons = neurons_model(data.to(device))
            # adversarial example corresponding to data
            advex_neurons = neurons_model(adversarial_examples[i].to(device))
            
            # Sum sensitivities from one image to the next layer by layer
            sensitivities_sum = {
                key: sensitivities_sum[key] + torch.abs(img_neurons[key] - advex_neurons[key]) 
                for key in sensitivities_sum
            }
            
            # clear cache
            # del img_neurons
            # del advex_neurons
            # fv.cleanMemGPU()
        
        # Average sensitivities over all images and normalize by number of elements in layer
        neuron_sensitivities = {
            key: sensitivities_sum[key] / (data_len * torch.numel(sensitivities_sum[key])) 
            for key in sensitivities_sum
        }
            
        return neuron_sensitivities

#### Non-robust model, epsilon = 0.1

In [None]:
# advex_dir = os.path.join(data_dir, "adversarial_examples")
# advex_std_01_dir = os.path.join(advex_dir, "adv_example_normal_1")
# std_sensitivities_file = os.path.join(output_dir, 'sensitivities_std_01.pkl')
# fv.cleanMemGPU()

#### Save to file

In [None]:
# advex_std = []
# for i in range(data_len):
#     advex_std.append(torch.load(os.path.join(advex_std_01_dir, f'{i}.pt')))

# display(advex_std)

In [None]:
# flamingos = []
# ex = []
# epsilon=0
# advex_number = 1
# r = range(100, 150)

# for i in r:
#     print(i)
#     flamingos.append(_advex(epsilon=epsilon, advex_number=advex_number, image_id=i))

In [None]:
def flamingos_advex(epsilons):
    for e in epsilons:
        print("Advexes for epsilon =", e)
        flamingo_file = os.path.join(advex_dir, "flamingos_advex_e"+str(e)+".pt")
        if not os.path.exists(flamingo_file):
            flamingos = []
            # flamingo range
            r = range(100, 150)
            for i in r:
                flamingos.append(
                    _advex(epsilon=e, advex_number=1, image_id=i)
                )
            torch.save(flamingos, flamingo_file)

In [None]:
epsilons = [0, 0.01, 0.1]
flamingos_advex(epsilons)
std_advex = dict()
rob_advex = dict()
for e in epsilons:
    flamingo_file = os.path.join(advex_dir, "flamingos_advex_e"+str(e)+".pt")
    flamingos = torch.load(flamingo_file)
    std_advex[e] = []
    for f in flamingos:
        l = list(f[1].values())
        if len(l) > 0:
            tu = l[0]
            t = torch.as_tensor([tu[2]])
            std_advex[e].append(t)
        else:
            print(len(std_advex[e]), l)
    rob_advex[e] = []
    for f in flamingos:
        l = list(f[0].values())
        if len(l) > 0:
            tu = l[0]
            t = torch.as_tensor([tu[2]])
            rob_advex[e].append(t)
        else:
            print(len(rob_advex[e]), l)

In [None]:
print(len(std_advex))
# std_sensitivities = compute_neuron_sensitivity(standard_model, std_advex)
#display(sensitivities)

In [None]:
import pickle
std_sensitivities_file = os.path.join(output_dir, 'sensitivities_std_01.pkl')
fv.cleanMemGPU()
if not os.path.exists(std_sensitivities_file):
    sensitivities = compute_neuron_sensitivity(standard_model, advex_std)
    with open(sensitivities_file, 'wb') as f:
        pickle.dump(sensitivities, f)
    del sensitivities
    fv.cleanMemGPU()

#### Load file

In [None]:
with open(std_sensitivities_file, 'rb') as f:
    loaded_sensitivities = pickle.load(f)

print(len(loaded_sensitivities))
# print(loaded_sensitivities)

### Find k most sensitive neurons

In [None]:
def top_k_sensitivities_per_layer(sensitivities, k):
    topk = torch.zeros(k)  # most sensitive neurons value
    topk_idx = [0] * k  # name of layer + index of most sensitive neurons
    
    for layer_name, tensor in sensitivities.items():
        if layer_name == "flatten":
            continue
        # find top k in layer
        topk_tensor, _ = torch.topk(torch.flatten(tensor), k)
        
        for val in topk_tensor:
            # check if bigger than something in topk
            bigger = False
            for top in topk:
                if val > top:
                    bigger = True
            
            if bigger:
                min_idx = torch.argmin(topk)  # get index
                
                # replace values
                topk[min_idx] = val
                val_idx = (tensor == val).nonzero(as_tuple=False)
                topk_idx[min_idx] = (layer_name, val_idx.flatten())
    
    return topk, topk_idx

In [None]:
_ = reload(fv)

In [None]:
k = 10

for e in epsilons:
    rob_sens = compute_neuron_sensitivity(robust_model, rob_advex[e])
    std_sens = compute_neuron_sensitivity(standard_model, std_advex[e])
    k_val_rob, k_id_rob = top_k_sensitivities_per_layer(rob_sens, k)
    k_val_std, k_id_std = top_k_sensitivities_per_layer(std_sens, k)
    channels = []
    for s in k_id_rob:
        channels.append(s[0] + ":" + str(s[1][1].item()))
    print("Robust visualization with epsilon:", e)
    fv.visualize_channels(channels, robust_model, output_dir, epsilon=e)
    channels = []
    for s in k_id_std:
        channels.append(s[0] + ":" + str(s[1][1].item()))
    print("Non Robust visualization with epsilon:", e)
    fv.visualize_channels(channels, standard_model, output_dir, epsilon=e)

In [None]:
channels = [objectives.direction("fc", torch.rand(512, device=device)), objectives.direction("avgpool", torch.rand(512, device=device))]
# for s in topk_idx:
#     channels.append(s[0] + ":" + str(s[1][1].item()))

fv.visualize_channels(channels, standard_model, output_dir, save=False)