## Import

In [None]:
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
from utils import *
from torch.autograd import Variable
import os
import numpy as np

## Check

In [None]:
check = torch.load('./data/adversarial/fgsm_0.pt')

In [None]:
plt.imshow(check[0].detach().cpu().squeeze(0).permute(1, 2, 0))
plt.show()

## Model

In [None]:
device = 'cuda' if torch.cuda.is_available else 'cpu'
model = load_model('./checkpoints/model.th')
model = model.to(device)

In [None]:
assert len(os.listdir('./data/adversarial')) == len(os.listdir('./data/normal'))
normal_data, targets = load_samples('./data/normal', 'fgsm')
print('Finish loading all normal examples')
adv_data, targets_ = load_samples('./data/adversarial', 'fgsm')
print('Finish loading all adversarial examples')
assert torch.sum(torch.eq(targets, targets_)).item() == len(targets)

In [None]:
len(list(model.children()))

In [None]:
def intermediate_feat(model, inputs, layer_ct):
    partial_mod = partial_model_execute(model, layer_ct)
    inputs = inputs.to(device)
    with torch.no_grad():
        outputs = partial_mod(inputs)
    output_feat = outputs.detach().cpu()
        
#         print(output_feat.shape)
    return output_feat

In [None]:
for lay in range(len(list(model.children()))):
    output_normal_feat = intermediate_feat(model, inputs=normal_data, layer_ct=lay)
    output_adv_feat = intermediate_feat(model, inputs=adv_data, layer_ct=lay)
    break
    

In [None]:
output_normal_feat = intermediate_feat(model, inputs=normal_data, layer_ct=9)
output_adv_feat = intermediate_feat(model, inputs=adv_data, layer_ct=9)

In [None]:
normal_mean = []
adv_mean = []
normal_var = []
adv_var = []
diff_list = []

for i in range(len(output_adv_feat)): # loop over all samples
    normal_var.append(np.std(output_normal_feat[i].flatten().numpy()))
    normal_mean.append(np.mean(output_normal_feat[i].flatten().numpy()))
    
    adv_var.append(np.std(output_adv_feat[i].flatten().numpy()))
    adv_mean.append(np.mean(output_adv_feat[i].flatten().numpy()))
    
    feature_map_diff = output_normal_feat[i].flatten().numpy() - output_adv_feat[i].flatten().numpy()
    diff_list.append(feature_map_diff)

diff_list = np.asarray(diff_list)




In [None]:
for i in range(100):
    plt.hist(diff_list[:, i], bins=30)
    plt.title('The distribution of differences in the {}th activation'.format(i))
    plt.show()

