In [1]:
# Für jede Brain region den besten layer für jedes model und davon das RDM

In [2]:
# For every brain region (start with EVC):
# For every model pair (CLIP-ViT, ALBEF-ViT, ViLT-ViT) - start CLIP - ViT
# Get BEST RDM only (for BEST layer)
#Swap rdm values of CLIP and VIT at random indices
#Compute Spearman R of these random matrices with EVC
#Compute the difference in the Spearman R
#Repeat this 300 times
#Measure how much of those 300 times the difference in Spearman R is higher than the true Spearman R difference between the models

In [3]:
# => für EVC hab ich am Ende nur 4 Zahlen. Die Tabelle mache ich dann aus allen brain regions

In [69]:
import pickle
import pandas as pd
file_path = 'BestImageLayers.pkl'

# Load the dictionary from the file using Pickle
with open(file_path, 'rb') as file:
    best_image_layers = pickle.load(file)

In [7]:
evc = best_image_layers['EVC']
evc

{'Albef Image': [('ALBEF Image Layer 7', 0.1925618419983465)],
 'Albef Multi': [('ALBEF Multi Layer 5', 0.06968936032508571)],
 'Vilt': [('VILT Layer 2', 0.10703017123425258)],
 'Clip': [('CLIP Image Layer 6', 0.24472337806853633)],
 'Vit': [('Vit Layer 10', 0.1913886562695591)]}

In [28]:
import os
import torch
from scipy.spatial.distance import pdist, squareform
from scipy import stats
import sys
sys.path.append("/Users/sebastian/University/Bachelor/Thesis/ba-thesis/")
from utils import load_files, get_rdms, get_rdm, get_spearmanr, inter_intra_similarity, get_upper_triu
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
def load_tensors(directory):
    d = {}
    for key in os.listdir(directory):
        if key.endswith(".pt"):
            filename = os.path.join(directory, key)
            tensor = torch.load(filename)
            d[key[:-3]] = tensor
    return d

In [22]:
def get_spearmanr_between_tensors(tensor1, tensor2, size):
    v1 = get_upper_triu(tensor1, size)
    v2 = get_upper_triu(tensor2, size)
    res = stats.spearmanr(v1, v2)
    return res.correlation

In [10]:
avg = load_tensors("avg/")
clip_img = load_tensors("sclip_img_rdms/")
albef_img = load_tensors("salbef_img_rdms/")
albef_multi = load_tensors("salbef_multi_rdms/")
vilt = load_tensors("svilt_rdms/")
vit = load_tensors("svit_rdms/")

In [76]:
avg_evc = avg['EVC']

In [75]:
best_vit_layer = vit[evc['Vit'][0][0]]
best_clip_layer = clip_img[evc['Clip'][0][0]]

In [37]:
def get_random_indices(size, amount_of_changed_idx):
    rand_arr = np.random.choice(np.arange(0, size-1), replace=False, size=amount_of_changed_idx)
    return rand_arr

In [105]:
def permutation_test(model1_layer, model2_layer, roi):
    np.random.seed(0)
    l = []
    roi_flat = get_upper_triu(roi, 81)
    model1_layer_flat = get_upper_triu(model1_layer, 81)
    model2_layer_flat = get_upper_triu(model2_layer, 81)
    corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
    corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
    p_true = corr_model1 - corr_model2
    for i in range(1000):
        rand_int = np.random.randint(3240)
        rand_arr = get_random_indices(3240, rand_int)
        model1_layer_flat = get_upper_triu(model1_layer, 81)
        model2_layer_flat = get_upper_triu(model2_layer, 81)
        model1_layer_flat[rand_arr], model2_layer_flat[rand_arr] = model2_layer_flat[rand_arr], model1_layer_flat[rand_arr]
        corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
        corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
        l.append(corr_model1-corr_model2)
    return p_true, l

In [125]:
def permutation_df(list_of_rows, rdm_mod1, rdm_mod2, name_mod1, name_mod2, best_img_layers_all_regions):
    list_of_columns = avg.keys()

    df = pd.DataFrame(index=list_of_rows, columns=list_of_columns)

    for k, v in sorted(avg.items()):
        avg_roi = avg[k]
        best_layers = best_img_layers_all_regions[k]
        mod2_layer_name = best_layers[name_mod2][0][0]
        mod1_layer_name = best_layers[name_mod1][0][0]
        best_mod2_layer = rdm_mod2[mod2_layer_name]
        best_mod1_layer = rdm_mod1[mod1_layer_name]
        p_true, l = permutation_test(best_mod1_layer, best_mod2_layer, avg_roi)
        # filter with broadcasting the amount of values that are GREATER than the true SPEARMAN R
        count_of_greater_values = np.sum(l > p_true) / 1000
        df[k] = np.array([mod1_layer_name, mod2_layer_name, p_true, count_of_greater_values])
    return df

In [126]:
list_of_rows = ['Layer CLIP', 'Layer ViT', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, clip_img, vit, 'Clip', 'Vit', best_image_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer CLIP,CLIP Image Layer 12,CLIP Image Layer 11,CLIP Image Layer 4,CLIP Image Layer 12,CLIP Image Layer 8,CLIP Image Layer 6
Layer ViT,Vit Layer 11,Vit Layer 11,Vit Layer 10,Vit Layer 11,Vit Layer 10,Vit Layer 10
Difference,0.08756202674958763,0.09881575989159258,-0.005148496160707616,0.053258540357657036,-0.008294526311312833,0.05333472179897725
Permutation Test Val,0.027,0.021,0.642,0.059,0.655,0.083


In [127]:
list_of_rows = ['Layer ALBEF', 'Layer ViT', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, albef_img, vit, 'Albef Image', 'Vit', best_image_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer ALBEF,ALBEF Image Layer 11,ALBEF Image Layer 11,ALBEF Image Layer 8,ALBEF Image Layer 11,ALBEF Image Layer 8,ALBEF Image Layer 7
Layer ViT,Vit Layer 11,Vit Layer 11,Vit Layer 10,Vit Layer 11,Vit Layer 10,Vit Layer 10
Difference,0.05779719900888941,0.04693386342703487,0.0037250839012841763,0.03818002291479897,-0.004380771401296779,0.0011731857287874126
Permutation Test Val,0.051,0.074,0.453,0.104,0.592,0.456


In [128]:
list_of_rows = ['Layer ALBEF', 'Layer ViT', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, albef_multi, vit, 'Albef Multi', 'Vit', best_image_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer ALBEF,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5
Layer ViT,Vit Layer 11,Vit Layer 11,Vit Layer 10,Vit Layer 11,Vit Layer 10,Vit Layer 10
Difference,-0.0288994481993948,-0.010203293999788782,-0.0817386044744701,-0.03083576398614435,-0.11214504771431458,-0.12169929594447337
Permutation Test Val,0.876,0.725,0.964,0.876,0.982,0.983


In [129]:
list_of_rows = ['Layer Vilt', 'Layer ViT', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, vilt, vit, 'Vilt', 'Vit', best_image_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer Vilt,VILT Layer 2,VILT Layer 2,VILT Layer 10,VILT Layer 2,VILT Layer 12,VILT Layer 2
Layer ViT,Vit Layer 11,Vit Layer 11,Vit Layer 10,Vit Layer 11,Vit Layer 10,Vit Layer 10
Difference,-0.01858392677326609,-0.007939106414291075,-0.08246925550062201,-0.003723643007425892,-0.1176200389921261,-0.0843584850353065
Permutation Test Val,0.784,0.651,0.967,0.566,0.982,0.968
