# Initialization

In [None]:
! pip install ftfy regex tqdm
! pip install git+https://github.com/openai/CLIP.git

import os
import clip
import torch
import pickle
import torchvision
import numpy as np
from glob import glob
from tqdm import tqdm
from PIL import Image
import torch.nn as nn
import matplotlib.pyplot as plt
from pkg_resources import packaging

In [None]:
# Download the dataset used
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip tiny-imagenet-200.zip

# Model loading

In [None]:
model, preprocess = clip.load("RN50x4")

In [None]:
class ModifiedResNetTraverser:
    def __init__(self, model):
        self.model = model
        self.target_layer_name = None
        self.target_layer = None

    def find_layer_by_name(self, target_layer_name):
        self.target_layer_name = target_layer_name
        self.target_layer = None
        self._traverse(self.model, target_layer_name.split('.'))
        return self.target_layer

    def _traverse(self, module, layer_names):
        if not layer_names:
            # If there are no more layer names, we've reached the target layer
            self.target_layer = module
            return

        current_layer_name = layer_names[0]
        remaining_layer_names = layer_names[1:]

        for name, sub_module in module.named_children():
            if name == current_layer_name:
                # Continue traversing with the next layer name
                self._traverse(sub_module, remaining_layer_names)

# Example usage
traverser = ModifiedResNetTraverser(model)
target_layer_name = "visual.layer4.5.conv3"
last_conv_layer = traverser.find_layer_by_name(target_layer_name)

In [None]:
class SaveActivations:
    def __init__(self):
        self.activations = None

    def __call__(self, module, input, output):
        self.activations = output.clone()

# Create an instance of the hook
hook = SaveActivations()

hook_handle = last_conv_layer.register_forward_hook(hook)



# Which neurons activate?

In [None]:
def get_most_responding_neurons(files, k=25):
  my_activations = []
  filenames = []
  for filename in files:
    filenames.append(filename)  # Store filename
    photo = Image.open(filename).convert("RGB")
    photo_vec = preprocess(photo)
    input = torch.stack([photo_vec]).cuda()

    with torch.no_grad():
      model.eval()
      hook = SaveActivations()
      hook_handle = last_conv_layer.register_forward_hook(hook)
      image_features = model.encode_image(input)
      #mean:
      #mean_values = torch.mean(hook.activations[0], dim=(1, 2), keepdim=False)
      #max:
      max_values, _ = torch.max(hook.activations[0].view(hook.activations[0].size(0), -1), dim=1)
      my_activations.append(max_values) #mean_values or max_values accordingly
      hook_handle.remove()

  hook_neural_activation = torch.stack(my_activations)
  values, tops = hook_neural_activation.topk(k, dim = 0)
  tops_filenames = [[filenames[i] for i in top] for top in tops]
  return tops_filenames, values

In [None]:
## One subfolder
image_dir = '/content/img_align_celeba'
batch_size = 50
image_files = os.listdir(image_dir)
image_files = [os.path.join(image_dir, f) for f in image_files]
image_batches = [image_files[i:i+50] for i in range(0, len(image_files), 50)]
all_tops = []
all_values = []

for i,batch in enumerate(image_batches):
    tops, values = get_most_responding_neurons(batch)
    all_tops.append(tops)
    all_values.append(values)

# Using filenames directly
stacked_tops = [item for sublist in all_tops for item in sublist]
stacked_values = torch.cat(all_values, dim=0)

top_values, top_indices = torch.topk(stacked_values, k=25, dim=0)
selected_tops = [[stacked_tops[i][j] for i in range(5)] for j in top_indices[0]]

In [None]:
# Plotting top images and their activation
neuron_idx = 0
for i,file in enumerate(selected_tops[neuron_idx]):
  print(top_values[i,neuron_idx].cpu().detach().numpy())
  image = Image.open(file).resize((100,100))
  display(image)

In [None]:
## All subfolders
parent_dir = '/content/tiny-imagenet-200/train/'
batch_size = 50
all_tops = []
all_values = []

for subfolder in tqdm(os.listdir(parent_dir)):
    subfolder_path = os.path.join(parent_dir, subfolder)
    if os.path.isdir(subfolder_path):
        image_dir = os.path.join(subfolder_path, 'images')

        # Process images in the 'images' folder of each subfolder
        image_files = os.listdir(image_dir)
        image_files = [os.path.join(image_dir, f) for f in image_files]
        image_batches = [image_files[i:i+batch_size] for i in range(0, len(image_files), batch_size)]

        # Process each batch of images
        for batch in image_batches:
            tops, values = get_most_responding_neurons(batch)
            all_tops.append(tops)
            all_values.append(values)

In [None]:
stacked_tops = [item for sublist in all_tops for item in sublist]
stacked_values = torch.cat(all_values, dim=0)
top_values, top_indices = torch.topk(stacked_values, k=25, dim=0)
top_indices_list = top_indices.tolist()
top_indices_transposed = list(map(list, zip(*top_indices_list)))

selected_filenames = []
for i, indices in enumerate(top_indices_transposed):
    elements = [stacked_tops[index][i] for index in indices]
    selected_filenames.append(elements)

In [None]:
# Plotting top images and their activations in 5x5
neuron_idx = 129
fig, axs = plt.subplots(5, 5, figsize=(8, 8))
for i, file in enumerate(selected_filenames[neuron_idx]):
    row_index = i // 5
    col_index = i % 5

    image = Image.open(file).resize((200, 200))

    axs[row_index, col_index].imshow(image)
    #axs[row_index, col_index].set_title(f'Activation: {top_values[i, neuron_idx]:.3f}')
    axs[row_index, col_index].axis('off')

#fig.suptitle(f'Image samples for neuron: {neuron_idx}')
plt.tight_layout()
plt.show()

Testing on our own datasets regarding Donald Trump and Mental Illness

In [None]:
subfolder_names = ["profile", "art", "text", "partial", "politics", "non political", "rights", "music-games"]
batch_size = 30
all_tops = []
all_values = []
image_files = []

for subfolder in subfolder_names:
    folder_path = os.path.join("/content/drive/MyDrive/patrec/trump", subfolder)

    for filename in [filename for filename in os.listdir(folder_path) if filename.endswith(".png") or filename.endswith(".jpg")]:
        image = os.path.join(folder_path, filename)
        image_files.append(image)

image_batches = [image_files[i:i+batch_size] for i in range(0, len(image_files), batch_size)]

# Process each batch of images
for batch in image_batches:
    tops, values = get_most_responding_neurons(batch,25)
    all_tops.append(tops)
    all_values.append(values)

In [None]:
stacked_tops = [item for sublist in all_tops for item in sublist]
stacked_values = torch.cat(all_values, dim=0)
top_values, top_indices = torch.topk(stacked_values, k=15, dim=0)
top_indices_list = top_indices.tolist()
top_indices_transposed = list(map(list, zip(*top_indices_list)))

selected_filenames = []
for i, indices in enumerate(top_indices_transposed):
    elements = [stacked_tops[index][i] for index in indices]
    selected_filenames.append(elements)

In [None]:
# Plotting top images and their activations in 5x5
neuron_idx = 89
fig, axs = plt.subplots(3, 5, figsize=(8, 8))
for i, file in enumerate(selected_filenames[neuron_idx]):
    row_index = i // 5
    col_index = i % 5

    image = Image.open(file).resize((200, 200))

    axs[row_index, col_index].imshow(image)
    #axs[row_index, col_index].set_title(f'Activation: {top_values[i, neuron_idx]:.3f}')
    axs[row_index, col_index].axis('off')

fig.suptitle(f'Image samples for neuron: {neuron_idx}')
plt.tight_layout()
plt.show()

In [None]:
subfolder_names = ["depression", "anxiety", "bad feeling", "psychology", "drugs", "unrelated", "travel-food-pet", "music-sports"]
batch_size = 30
all_tops = []
all_values = []
image_files = []

for subfolder in subfolder_names:
    folder_path = os.path.join("/content/drive/MyDrive/patrec/mental illness", subfolder)

    for filename in [filename for filename in os.listdir(folder_path) if filename.endswith(".png") or filename.endswith(".jpg")]:
        image = os.path.join(folder_path, filename)
        image_files.append(image)

image_batches = [image_files[i:i+batch_size] for i in range(0, len(image_files), batch_size)]

# Process each batch of images
for batch in image_batches:
    k = 25
    if len(batch) < 25:
        k = len(batch)
    tops, values = get_most_responding_neurons(batch, k)
    all_tops.append(tops)
    all_values.append(values)

In [None]:
stacked_tops = [item for sublist in all_tops for item in sublist]
stacked_values = torch.cat(all_values, dim=0)
top_values, top_indices = torch.topk(stacked_values, k=15, dim=0)
top_indices_list = top_indices.tolist()
top_indices_transposed = list(map(list, zip(*top_indices_list)))

selected_filenames = []
for i, indices in enumerate(top_indices_transposed):
    elements = [stacked_tops[index][i] for index in indices]
    selected_filenames.append(elements)

In [None]:
# Plotting top images and their activations in 5x5
neuron_idx = 2191
fig, axs = plt.subplots(3, 5, figsize=(8, 8))
for i, file in enumerate(selected_filenames[neuron_idx]):
    row_index = i // 5
    col_index = i % 5

    image = Image.open(file).resize((200, 200))

    axs[row_index, col_index].imshow(image)
    #axs[row_index, col_index].set_title(f'Activation: {top_values[i, neuron_idx]:.3f}')
    axs[row_index, col_index].axis('off')

fig.suptitle(f'Image samples for neuron: {neuron_idx}')
plt.tight_layout()
plt.show()

# Text Images

We created images with the names of ImageNet classes written on them to test if CLIP neurons also respond to text

In [None]:
import requests

imagenet_labels = requests.get("https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json").json()

In [None]:
import cv2

output_directory = "/content/drive/MyDrive/patrec/ClassNames"
# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

def create_pattern_image(text, text_color):
    # Define image dimensions
    width, height = 400, 200

    # Create a white background image
    image = np.ones((height, width, 3), dtype=np.uint8) * 255

    # Choose font and scale
    # font = cv2.FONT_HERSHEY_SIMPLEX
    # font = cv2.FONT_HERSHEY_SCRIPT_COMPLEX
    # font = cv2.FONT_HERSHEY_DUPLEX
    font = cv2.FONT_HERSHEY_COMPLEX
    font_scale = 2

    # Get the size of the text
    text_size = cv2.getTextSize(text, font, font_scale, 2)[0]

    # Calculate text position to be centered
    text_x = (width - text_size[0]) // 2
    text_y = (height + text_size[1]) // 2

    # Write the text on the image with specified color
    cv2.putText(image, text, (text_x, text_y), font, font_scale, text_color, 2, cv2.LINE_AA)

    return image

def save_image(image, text, color_name):
    # Save the image to a file
    dir = output_directory
    if not os.path.exists(dir):
        os.makedirs(dir)
    filename = f"{color_name}.png"
    filepath = os.path.join(dir, filename)
    cv2.imwrite(filepath, image)
    print(f"Image with {color_name} text saved as {filename}")

# Generate and save images for each color
for label in imagenet_labels:
    pattern_image = create_pattern_image(label, (0,0,0))
    save_image(pattern_image, label, label)


In [None]:
## All subfolders
folders = ["/content/drive/MyDrive/patrec/ClassNames", "/content/drive/MyDrive/patrec/ClassNames2", "/content/drive/MyDrive/patrec/ClassNames3", "/content/drive/MyDrive/patrec/ClassNames4"]
parent_dir = "/content/drive/MyDrive/patrec/ClassNames"
batch_size = 50
all_tops = []
all_values = []

for subfolder in tqdm(folders):
    if os.path.isdir(subfolder):
        image_dir = subfolder

        # Process images in the 'images' folder of each subfolder
        image_files = os.listdir(image_dir)
        image_files = [os.path.join(image_dir, f) for f in image_files]
        image_batches = [image_files[i:i+batch_size] for i in range(0, len(image_files), batch_size)]

        # Process each batch of images
        for batch in image_batches:
            tops, values = get_most_responding_neurons(batch)
            all_tops.append(tops)
            all_values.append(values)

'''
# Process images
image_files = os.listdir(parent_dir)
image_files = [os.path.join(parent_dir, f) for f in image_files]
image_batches = [image_files[i:i+batch_size] for i in range(0, len(image_files), batch_size)]

# Process each batch of images
for batch in image_batches:
    tops, values = get_most_responding_neurons(batch)
    all_tops.append(tops)
    all_values.append(values)
'''

stacked_tops = [item for sublist in all_tops for item in sublist]
stacked_values = torch.cat(all_values, dim=0)
top_values, top_indices = torch.topk(stacked_values, k=25, dim=0)
top_indices_list = top_indices.tolist()
top_indices_transposed = list(map(list, zip(*top_indices_list)))

selected_files = []
for indices in top_indices_transposed:
    elements = [stacked_tops[index][i] for i, index in enumerate(indices)]
    selected_files.append(elements)

In [None]:
# Plotting top images and their activations in 5x5
neuron_idx = 48
fig, axs = plt.subplots(5, 5, figsize=(8, 8))
for i, file in enumerate(selected_files[neuron_idx]):
    row_index = i // 5
    col_index = i % 5

    image = Image.open(file).resize((400, 200))

    axs[row_index, col_index].imshow(image)
    axs[row_index, col_index].set_title(f'Activation: {top_values[i, neuron_idx]:.3f}')
    axs[row_index, col_index].axis('off')

fig.suptitle(f'Text Image samples for neuron: {neuron_idx}')
plt.tight_layout()
plt.show()