In [1]:
import os
import pandas as pd
import json
import matplotlib.pyplot as plt
from collections import Counter, OrderedDict

### 1. Load data on neurons and their similar concepts 

In [3]:
outputs_18_img = pd.read_csv("/Users/nursulusagimbayeva/Downloads/TrustworthyML-24/neural_networks_explainability/network_dissect/results/resnet18_imagenet_24_07_25_19_50/descriptions.csv")
outputs_18_places = pd.read_csv("/Users/nursulusagimbayeva/Downloads/TrustworthyML-24/neural_networks_explainability/network_dissect/results/resnet18_places_24_07_25_19_42/descriptions.csv")
outputs_50 = pd.read_csv("/Users/nursulusagimbayeva/Downloads/TrustworthyML-24/neural_networks_explainability/network_dissect/results/resnet50_imagenet_24_07_25_22_10/descriptions.csv")

In [4]:
# for now, let "outputs" stand for one of the model types

outputs = outputs_18_img
outputs_tuples = list(zip(list(outputs['layer']), list(outputs['unit']), list(outputs['description']), list(outputs['similarity'])))

In [5]:
outputs_tuples[0]

('layer3', 0, 'juvenile', 0.038391113)

### 2. Inspect most similar concepts

In [6]:
# 1: Filter tuples where the first element is 'layer'
layer_tuples = [t for t in outputs_tuples if t[0] == 'fc']

# 2: Sort the filtered tuples by the last element in descending order
sorted_layer_tuples = sorted(layer_tuples, key=lambda x: x[-1], reverse=True)

# 3: Select the top 50 tuples
top_50_layer_tuples = sorted_layer_tuples[:50]

In [7]:
concepts_learned_50 = Counter([el[2] for el in top_50_layer_tuples])

In [9]:
top_probs = max([el[3] for el in top_50_layer_tuples])

In [10]:
top_probs

0.22399902

In [11]:
top_50_layer_tuples

[('fc', 318, 'insect', 0.22399902),
 ('fc', 144, 'pelican', 0.22184753),
 ('fc', 300, 'insect', 0.21965027),
 ('fc', 312, 'insect', 0.21437073),
 ('fc', 308, 'insect', 0.2116394),
 ('fc', 310, 'insect', 0.20948792),
 ('fc', 303, 'insect', 0.207901),
 ('fc', 316, 'insect', 0.20521545),
 ('fc', 317, 'insect', 0.20162964),
 ('fc', 70, 'insect', 0.20101929),
 ('fc', 131, 'bird', 0.1995697),
 ('fc', 189, 'terrier', 0.1993103),
 ('fc', 302, 'insect', 0.19837952),
 ('fc', 184, 'terrier', 0.198349),
 ('fc', 304, 'insect', 0.19819641),
 ('fc', 196, 'terrier', 0.1958313),
 ('fc', 311, 'insect', 0.19548035),
 ('fc', 127, 'pelican', 0.19422913),
 ('fc', 307, 'insect', 0.19335938),
 ('fc', 188, 'terrier', 0.19238281),
 ('fc', 134, 'ibis', 0.19204712),
 ('fc', 320, 'insect', 0.19102478),
 ('fc', 315, 'insect', 0.1884613),
 ('fc', 253, 'basename', 0.18743896),
 ('fc', 175, 'dog', 0.18603516),
 ('fc', 141, 'birding', 0.18589783),
 ('fc', 185, 'terrier', 0.18504333),
 ('fc', 73, 'spider', 0.18486023),


### 3. Plotting the distribution of concepts for all the models

In [None]:
concepts_learned = Counter([el[2] for el in sorted_layer_tuples]) 

# Current model
resnet18_img_concepts = OrderedDict(concepts_learned.most_common())

## Run this code when looking at outputs of other models
# resnet50_concepts = OrderedDict(concepts_learned.most_common())
# resnet18_places_concepts = OrderedDict(concepts_learned.most_common())

In [None]:
# List of dictionaries and corresponding subplot titles
dicts = [resnet18_places_concepts, resnet18_img_concepts, resnet50_concepts]
titles = ['ResNet18_Places', 'ResNet18_ImageNet', 'Resnet50_ImageNet']

# Create subplots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))  # 1 row, 3 columns

# Plotting each dictionary on a separate subplot
for i, (data, title) in enumerate(zip(dicts, titles)):
    keys = range(len(data.keys()))
    values = list(data.values())
    
    axes[i].bar(keys, values, color='skyblue')
    axes[i].set_title(title)
    axes[i].set_xlabel('Keys')
    axes[i].set_ylabel('Values')

# Adjust layout and show plot
plt.tight_layout()
plt.show()