In [None]:
import sys
sys.path.append('./scripts/')
import os
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib.axes as axes
import seaborn as sns
import math
import copy
import numpy as np
sns.set_style("darkgrid")
from PIL import Image
import random # random seed to reproduce MDS and t-SNE plots

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn import cluster # k-Means clustering
from sklearn.cluster import KMeans
from sklearn import manifold # MDS and t-SNE
from sklearn.metrics import silhouette_score # silhouette width for clustering
from sklearn import preprocessing # scaling attributes
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.metrics.pairwise import pairwise_distances
import hdbscan
import umap

import torch
import torchvision

from lucent.optvis import render, param, transform, objectives

import imp
import my_datasets
import utilities 
imp.reload(my_datasets) 
imp.reload(utilities) 

plt.rcParams["figure.figsize"] = (3,3)
random.seed(2023)

In [None]:
# dataset='ilsvrc12fine'
dataset='ilsvrc12'
paths, count, y, idx_to_labels = my_datasets.get_dataset(dataset)

print(count, len(paths))

In [None]:
# For ilsvrc12fine dataset, paths are mapped differently
if dataset=='ilsvrc12fine':
    idxs=np.arange(0, 1281167, 10) 
    classes=np.unique(y[idxs])
    ppaths=[paths[i] for i in idxs]
    paths=ppaths

In [None]:
layer='Mixed_7b.cat_2'
SAVEFOLD0=f'../outputs/{dataset}'
SAVEFOLD=f"{SAVEFOLD0}/{layer}/"

In [None]:
#gradients_wrt_conv_layer=np.load(f"{SAVEFOLD}/gradients_wrt_conv_layer.npy")
predictions=np.load(f"{SAVEFOLD}/predictions.npy", mmap_mode = 'r')
conv_maps=np.load(f"{SAVEFOLD}/conv_maps.npy", mmap_mode = 'r')

# pvh=np.load(f"{SAVEFOLD}/eigenvectors.npy",allow_pickle=True, mmap_mode = 'r')

In [None]:
conv_maps_avg = conv_maps.mean(3).mean(2)

In [None]:
# pu, ps, pvh = np.linalg.svd(conv_maps_avg)

# np.save(f"{SAVEFOLD}/pu.npy", pu)
# np.save(f"{SAVEFOLD}/ps.npy", ps)
# np.save(f"{SAVEFOLD}/eigenvectors.npy", pvh)

In [None]:
pvh = np.load(f'{SAVEFOLD}/eigenvectors.npy')
pu = np.load(f'{SAVEFOLD}/pu.npy')
ps = np.load(f'{SAVEFOLD}/ps.npy')

In [None]:
transforms = None # None / "standardise" / "normalise"

In [None]:
scale = StandardScaler()
normalise = MinMaxScaler()

standardised_data = scale.fit_transform(conv_maps_avg) 
normalised_data = normalise.fit_transform(conv_maps_avg) # .shape (10000, 2048)

In [None]:
if transforms == "standardise":
    activations = standardised_data
    print("Standardise")
elif transforms == "normalise": 
    activations = normalised_data
    print("Normalise")
else: 
    activations = conv_maps_avg
    print("Raw activations")

In [None]:
## Random analysis

Evec maximally projecting images

In [None]:
num_dirs = 20
top=50
evecs_dot = np.empty([len(conv_maps_avg),num_dirs])
evecs_sim = np.empty([len(conv_maps_avg),num_dirs])
for i in range(len(conv_maps_avg)):
    for direction in range(num_dirs):
        evecs_dot[i,direction] = np.dot(conv_maps_avg[i], pvh[direction])
        evecs_sim[i,direction] = evecs_dot[i,direction]/(np.linalg.norm(conv_maps_avg[i])*np.linalg.norm(conv_maps_avg[direction]))

top_evec_projs = []
for direction in range(len(evecs_dot[0,])):
    top_evec_projs.append(evecs_dot[:,direction].argsort()[-top:][::-1])
    
for direction in range(num_dirs):
    evec_projs_f = f"{SAVEFOLD}/analysis/evec_max_projs_{direction}.png"
    if not os.path.exists(evec_projs_f):
        fig, ax = plt.subplots(math.ceil(top//5), 5, figsize = (10,20))
        ax = ax.flatten()
        for idx, im_id in enumerate(top_evec_projs[direction]):# enumerate(concepts_dot[:,concept].argsort()[-top:][::-1]):
            im = Image.open(paths[im_id])
            ax[idx].imshow(im)
            ax[idx].set_title(f"{im_id}", size = 8)
            ax[idx].axis('off')
        fig.savefig(evec_projs_f, bbox_inches="tight") 

UMAP of evec to projections

In [None]:
# image collection params
direction = 0
top = 50

# clustering params
linkage='ward'
metric='euclidean'
distance_threshold = 12

kmeans_outlier_threshold = 15
min_ims_cluster = 5

In [None]:
#in this case direction is evec direction
evec_dot = np.empty([len(conv_maps_avg)])
evec_sim = np.empty([len(conv_maps_avg)])
for i in range(len(conv_maps_avg)):
    evec_dot[i] = np.dot(conv_maps_avg[i], pvh[direction])
    evec_sim[i] = evec_dot[i]/(np.linalg.norm(conv_maps_avg[i])*np.linalg.norm(conv_maps_avg[direction]))

top_ims = evec_dot.argsort()[-top:][::-1]
top_activations = utilities.get_activations(activations_avg = activations, ims=top_ims)
    
fig, ax = plt.subplots(math.ceil(top//5), 5, figsize = (10,20))
ax = ax.flatten()
for idx, im_id in enumerate(top_ims):# enumerate(concepts_dot[:,concept].argsort()[-top:][::-1]):
    im = Image.open(paths[im_id])
    ax[idx].imshow(im)
    ax[idx].set_title(f"{im_id}", size = 8)
    ax[idx].axis('off')

In [None]:
evec_UMAP_f = f"{SAVEFOLD}/analysis/evec_UMAP_{direction}.png"
XY_UMAP = umap.UMAP(n_components=2).fit_transform(top_activations)
amount = .1
fig, ax = plt.subplots(figsize = (10,10))
#ax.set_title("UMAP")
ax.scatter(XY_UMAP[:,0], XY_UMAP[:,1]) 

for x0, y0, path in zip(utilities.rand_jitter(XY_UMAP[:,0], amount), utilities.rand_jitter(XY_UMAP[:,1], amount), [paths[i] for i in top_ims]):
    ab = AnnotationBbox(utilities.getImage(path, zoom = 0.1), (x0, y0), frameon=False)
    ax.add_artist(ab)
ax.axes.xaxis.set_ticklabels([])
ax.axes.yaxis.set_ticklabels([])
#ax.axis('off') 
fig.savefig(evec_UMAP_f, bbox_inches="tight") 

In [None]:
print(ps)