## Set up

In [None]:
import sys
sys.path.append('./scripts/')
import os

import matplotlib.pyplot as plt
import seaborn as sns
import math
import copy
import numpy as np
sns.set_style("darkgrid")
from PIL import Image
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

import imp
import my_datasets
import utilities 
imp.reload(my_datasets) 
imp.reload(utilities) 

In [None]:
dataset='ilsvrc12'
paths, count, y, idx_to_labels =  my_datasets.get_dataset(dataset)

print(count, len(paths))

In [None]:
layer='Mixed_7b.cat_2'
SAVEFOLD0=f'../outputs/{dataset}'

SAVEFOLD=f"{SAVEFOLD0}/{layer}/"

In [None]:
gradients_wrt_conv_layer=np.load(f"{SAVEFOLD}/gradients_wrt_conv_layer.npy")
predictions=np.load(f"{SAVEFOLD}/predictions.npy")
conv_maps=np.load(f"{SAVEFOLD}/conv_maps.npy")

pvh=np.load(f"{SAVEFOLD}/eigenvectors.npy",allow_pickle=True)

In [None]:
# conv_maps.shape # (10000, 2048, 8, 8)
# GAP since featre maps of size 8*8 for layer Mixed_7b.cat_2
conv_maps_avg = conv_maps.mean(3).mean(2)
# conv_maps_avg.shape # (10000, 2048)

In [None]:
projections = np.zeros(conv_maps_avg.shape) # .shape (10000, 2048)
for evec in range(len(pvh)):
    projections[:, evec] = np.dot(conv_maps_avg, pvh[evec,:].T)

In [None]:
transforms = "SVD" # "SVD" / None / "standardise" / "normalise"

In [None]:
scale = StandardScaler()
normalise = MinMaxScaler()

standardised_data = scale.fit_transform(conv_maps_avg) 
normalised_data = normalise.fit_transform(conv_maps_avg) # .shape (10000, 2048)

In [None]:
if transforms == "standardise":
    activations = standardised_data
    print("Standardise")
elif transforms == "normalise": 
    activations = normalised_data
    print("Normalised")
elif transforms == "SVD": 
    activations = projections
    print("SVD")
else: 
    activations = conv_maps_avg
    print("Raw activations")

## Boxplots for neuron activations on Imagenet

In [None]:
if not os.path.exists(SAVEFOLD+"boxplots"):
    os.mkdir(SAVEFOLD+"boxplots")

In [None]:
num_neurons = conv_maps.shape[1]

for hundreds in range(math.ceil(num_neurons/100)):
    boxplot_f = f"{SAVEFOLD}/activation_boxplots/hundreds_{hundreds}.png"

    if not os.path.exists(boxplot_f):
        fig, ax = plt.subplots(10, 1, figsize=(8, 8 * 10)) # 
        
        for start in range(hundreds*100,min((hundreds+1)*100,num_neurons),10):
            ax[(start//10)-(hundreds*10)].set_title(f"Distribution of neuron {start}-{start + 9} activations")
            ax[(start//10)-(hundreds*10)].set_ylim([0, 3]) 
            sns.boxplot(activations[:,start:start+10], ax = ax[(start//10)-(hundreds*10)])
        
        fig.savefig(boxplot_f, bbox_inches="tight") 
        print("saved file!")
    else: 
        print("File already exists!")

## Random  analysis

In [None]:
if not os.path.exists(SAVEFOLD+"max_activating_ims"):
    os.mkdir(SAVEFOLD+"max_activating_ims")

In [None]:
num_neurons = conv_maps.shape[1]
plt.clf()
sns.set()

for hundreds in range(math.ceil(num_neurons/100)):
    max_activating_ims_f = f"{SAVEFOLD}/max_activating_ims/hundreds_{hundreds}.png"
    neuron_range = range(hundreds*100,min(hundreds*100+100,num_neurons))
    if not os.path.exists(max_activating_ims_f):
        fig, ax = plt.subplots(100, 11, figsize=(22, 200)) # (10, 11, figsize=(22, 20))
        ax = ax.flatten()
        i=0
        for neuron in neuron_range: ### change
            if neuron > num_neurons:
                break
            top_10 = activations[:,neuron].argsort()[-10:][::-1]
            ax[i].text(1.0, 0.5, "Neuron "+str(neuron), ha='right', va='center', family='sans-serif', size=16)
            ax[i].axis('off')
            i+=1 
            for act in top_10:
                # print("Image: " + str(act))
                im = Image.open(paths[act])
                # im.show() 
                ax[i].imshow(im)
                ax[i].axis('off')
                ax[i].set_title(str(act)+" : "+str(round(activations[act,neuron],3)), size=12)
                i+=1
        plt.show()

        fig.savefig(max_activating_ims_f, bbox_inches="tight") 
        print("saved file!")
    else: 
        print("File already exists!")

In [None]:
neuron # 240
neuron_range # range(140, 1500)

### SVD directions

In [None]:
if transforms == "SVD": 
    if not os.path.exists(SAVEFOLD+"evec_max_activating_ims"):
        os.mkdir(SAVEFOLD+"evec_max_activating_ims")

In [None]:
if transforms == "SVD": 
    num_neurons = conv_maps.shape[1]
    plt.clf()
    sns.set()

    for hundreds in range(math.ceil(num_neurons/100)):
        max_activating_ims_f = f"{SAVEFOLD}/evec_max_activating_ims/hundreds_{hundreds}.png"
        neuron_range = range(hundreds*100,min(hundreds*100+100,num_neurons))
        if not os.path.exists(max_activating_ims_f):
            fig, ax = plt.subplots(100, 11, figsize=(22, 200)) # (10, 11, figsize=(22, 20))
            ax = ax.flatten()
            i=0
            for neuron in neuron_range: ### change
                if neuron > num_neurons:
                    break
                top_10 = activations[:,neuron].argsort()[-10:][::-1]
                ax[i].text(1.0, 0.5, "Direction "+str(neuron), ha='right', va='center', family='sans-serif', size=16)
                ax[i].axis('off')
                i+=1 
                for act in top_10:
                    # print("Image: " + str(act))
                    im = Image.open(paths[act])
                    # im.show() 
                    ax[i].imshow(im)
                    ax[i].axis('off')
                    ax[i].set_title(str(act)+" : "+str(round(activations[act,neuron],3)), size=12)
                    i+=1
            plt.show()

            fig.savefig(max_activating_ims_f, bbox_inches="tight") 
            print("saved file!")
        else: 
            print("File already exists!")

## Maximally activating dataset examples

### activations for one image

In [None]:
print(idx_to_labels[f'{int(y[0])}'][1])
Image.open(paths[0])

In [None]:
# np.argmax(activations[0]) # 1215
# activations[0].argsort()[-5:][::-1] # array([1215, 1527, 1620, 1876, 1624])

In [None]:
# example 0 activations
print(activations[0])
plt.plot(activations[0])

## RW

### Boxplots

In [None]:
plt.figure(figsize = (8,8))
plt.boxplot(conv_maps_avg[:,0])
plt.violinplot(conv_maps_avg[:,0])
# sns.boxplot(conv_maps_avg[:,0])
# sns.violinplot(conv_maps_avg[:,0])
# plt.boxplot(conv_maps_avg[:,:10])
# plt.violinplot(conv_maps_avg[:,:10])

# for start in range(0,2048,10):
#     fig, ax = plt.subplots()
#     ax.set_title(f"Distribution of neuron {start}-{start + 10} activations")
#     sns.boxplot(conv_maps_avg[:,start:start+10])

In [None]:
# # tried 70*10 700 at a time, couldn't open plots
# boxplot_f0 = f"{SAVEFOLD}/boxplot0.png"
# boxplot_f1 = f"{SAVEFOLD}/boxplot1.png"
# boxplot_f2 = f"{SAVEFOLD}/boxplot2.png"

# num_neurons=conv_maps.shape[1]
# # need to break into 3 plots
# num_neurons0=700
# num_neurons1=1400
# # num_neurons2=num_neurons

# if not os.path.exists(boxplot_f0):
#     fig, ax = plt.subplots(math.ceil(num_neurons0/10), 1, figsize=(8, 8 * math.ceil(num_neurons0/10)))
#     for start in range(0,num_neurons0,10):
#         ax[start//10].set_title(f"Distribution of neuron {start}-{start + 9} activations")
#         sns.boxplot(conv_maps_avg[:,start:start+10], ax = ax[start//10])
#     plt.savefig(boxplot_f0) 
#     print("saved file!")
# else: 
#     print("File already exists!")

In [None]:
# if not os.path.exists(boxplot_f1):
#     fig, ax = plt.subplots(math.ceil((num_neurons1-num_neurons0)/10), 1, figsize=(8, 8 * math.ceil((num_neurons1-num_neurons0)/10)))
#     for start in range(num_neurons0,num_neurons1,10):
#         ax[(start//10)-(num_neurons0//10)].set_title(f"Distribution of neuron {start}-{start + 9} activations")
#         sns.boxplot(conv_maps_avg[:,start:start+10], ax = ax[(start//10)-(num_neurons0//10)])
#     plt.savefig(boxplot_f1) 
#     print("saved file!")
# else: 
#     print("File already exists!")
#     Image.open(boxplot_f1)

In [None]:
# if not os.path.exists(boxplot_f2):
#     fig, ax = plt.subplots(math.ceil(num_neurons-num_neurons1/10), 1, figsize=(8, 8 * math.ceil((num_neurons-num_neurons1)/10)))
#     for start in range(num_neurons1,num_neurons,10):
#         ax[(start//10)-(num_neurons1//10)].set_title(f"Distribution of neuron {start}-{start + 9} activations")
#         sns.boxplot(conv_maps_avg[:,start:start+10], ax = ax[(start//10)-(num_neurons1//10)])
#     plt.savefig(boxplot_f2) 
#     print("saved file!")
# else: 
#     print("File already exists!")

### Maximally actitivating dataset examples


In [None]:
neuron = 0

print("\nmaximally activating dataset example: ")
print(np.argmax(conv_maps_avg[:,neuron])) # 5205

print("\n10 maximally activating dataset example: ")
top_25 = conv_maps_avg[:,neuron].argsort()[-25:][::-1]
print(top_25) # array([5205, 4403, 1826, 9692, 6182, 5160, 6196, 7968, 6140, 4434])

print("\n10 corresponding highest activations: ")
print(conv_maps_avg[top_25,neuron])

In [None]:
for i in top_10:
    print("Image: " + str(i))
    im = Image.open(paths[i])
    im.show() 

In [None]:
# working example for one neuron
neuron=0
top_10 = conv_maps_avg[:,neuron].argsort()[-10:][::-1]
fig, ax = plt.subplots(1, 11, figsize=(22, 2))
ax = ax.flatten()
sns.set()
i=0
ax[i].text(1.0, 0.5, "Neuron "+str(neuron), ha='right', va='center', family='sans-serif', size=16)
ax[i].axis('off')
i+=1
for act in top_10:
    # print("Image: " + str(act))
    im = Image.open(paths[act])
    # im.show() 
    ax[i].imshow(im)
    ax[i].axis('off')
    ax[i].set_title(str(act)+" : "+str(round(conv_maps_avg[act,neuron],3)), size=12)
    i+=1
plt.show()

In [None]:
# works for fifty
num_neurons = conv_maps.shape[1]
plt.clf()
sns.set()
fifty = 0
max_activating_ims_f = f"{SAVEFOLD}/max_activating_ims_{fifty}.png"
neuron_range = range(50)

if not os.path.exists(max_activating_ims_f):
    fig, ax = plt.subplots(50, 11, figsize=(22, 100)) # (10, 11, figsize=(22, 20))
    ax = ax.flatten()
    i=0
    for neuron in neuron_range: ### change
        top_10 = conv_maps_avg[:,neuron].argsort()[-10:][::-1]
        ax[i].text(1.0, 0.5, "Neuron "+str(neuron), ha='right', va='center', family='sans-serif', size=16)
        ax[i].axis('off')
        i+=1 
        for act in top_10:
            # print("Image: " + str(act))
            im = Image.open(paths[act])
            # im.show() 
            ax[i].imshow(im)
            ax[i].axis('off')
            ax[i].set_title(str(act)+" : "+str(round(conv_maps_avg[act,neuron],3)), size=12)
            i+=1
    plt.show()

In [None]:
# just looking 
num_neurons = 101# conv_maps.shape[1]
plt.clf()
sns.set()

for fifties in range(math.ceil(num_neurons/50)):
    max_activating_ims_f = f"{SAVEFOLD}/max_activating_ims_{fifties}.png"
    neuron_range = range(fifties*50,fifties*50+50)
    if not os.path.exists(max_activating_ims_f):
        fig, ax = plt.subplots(50, 11, figsize=(22, 100)) # (10, 11, figsize=(22, 20))
        ax = ax.flatten()
        i=0
        for neuron in neuron_range: ### change
            top_10 = conv_maps_avg[:,neuron].argsort()[-10:][::-1]
            ax[i].text(1.0, 0.5, "Neuron "+str(neuron), ha='right', va='center', family='sans-serif', size=16)
            ax[i].axis('off')
            i+=1 
            for act in top_10:
                # print("Image: " + str(act))
                im = Image.open(paths[act])
                # im.show() 
                ax[i].imshow(im)
                ax[i].axis('off')
                ax[i].set_title(str(act)+" : "+str(round(conv_maps_avg[act,neuron],3)), size=12)
                i+=1
        plt.show()

        plt.savefig(max_activating_ims_f) 
        print("saved file!")
    else: 
        print("File already exists!")

In [None]:
tens = 0
Image.open(f"{SAVEFOLD}/max_activating_ims/tens_{tens}.png") # .size (640, 480)