In [10]:
import pickle
import pandas as pd
file_path = 'BestImageLayersAlgonauts.pkl'

# Load the dictionary from the file using Pickle
with open(file_path, 'rb') as file:
    best_image_layers = pickle.load(file)

In [2]:
import os
import numpy as np
import torch
from scipy.spatial.distance import pdist, squareform
from scipy import stats
import sys
sys.path.append("/Users/sebastian/University/Bachelor/Thesis/ba-thesis/")
from utils import load_files, get_rdms, get_rdm, get_spearmanr, inter_intra_similarity, get_upper_triu
%load_ext autoreload
%autoreload 2

In [3]:
def load_tensors(directory):
    d = {}
    for key in os.listdir(directory):
        if key.endswith(".pt"):
            filename = os.path.join(directory, key)
            tensor = torch.load(filename)
            d[key[:-3]] = tensor
    return d

In [4]:
def get_spearmanr_between_tensors(tensor1, tensor2, size):
    v1 = get_upper_triu(tensor1, size)
    v2 = get_upper_triu(tensor2, size)
    res = stats.spearmanr(v1, v2)
    return res.correlation

In [5]:
avg_visual = load_tensors("avg_visual_lhrh/")
avg_words = load_tensors("avg_words_lhrh/")
avg_faces = load_tensors("avg_faces_lhrh/")
avg_streams = load_tensors("avg_streams_lhrh/")
avg_places = load_tensors("avg_places_lhrh/")
avg_body = load_tensors("avg_body_lhrh/")
clip_img = load_tensors("clip_rdms/")
albef_img = load_tensors("albef_rdms/")
vilt = load_tensors("vilt_rdms/")
vit = load_tensors("vit_rdms/")

In [6]:
avg = avg_visual | avg_words | avg_faces | avg_streams | avg_places | avg_body

In [7]:
def get_random_indices(size, amount_of_changed_idx):
    rand_arr = np.random.choice(np.arange(0, size-1), replace=False, size=amount_of_changed_idx)
    return rand_arr

In [8]:
def permutation_test(model1_layer, model2_layer, roi):
    np.random.seed(0)
    l = []
    roi_flat = get_upper_triu(roi, 872)
    model1_layer_flat = get_upper_triu(model1_layer, 872)
    model2_layer_flat = get_upper_triu(model2_layer, 872)
    corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
    corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
    p_true = corr_model1 - corr_model2
    for i in range(300):
        rand_int = np.random.randint(379756)
        rand_arr = get_random_indices(379756, rand_int)
        model1_layer_flat = get_upper_triu(model1_layer, 872)
        model2_layer_flat = get_upper_triu(model2_layer, 872)
        model1_layer_flat[rand_arr], model2_layer_flat[rand_arr] = model2_layer_flat[rand_arr], model1_layer_flat[rand_arr]
        corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
        corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
        l.append(corr_model1-corr_model2)
    return p_true, l

In [28]:
from tqdm import tqdm
def permutation_df(rdm_mods1, rdm_mods2, name_mods1, name_mods2, best_layers_all_regions, avg):
    list_of_rows = sorted(avg.keys())

    df = pd.DataFrame(index=list_of_rows)
    for i in range(len(rdm_mods1)):
        l_diff = []
        l_p = []
        rdm_mod1 = rdm_mods1[i]
        rdm_mod2 = rdm_mods2[i]
        name_mod1 = name_mods1[i]
        name_mod2 = name_mods2[i]
        for k, v in tqdm(sorted(avg.items())):
            avg_roi = avg[k]
            best_layers = best_layers_all_regions[k]
            mod2_layer_name = best_layers[name_mod2][0]
            mod1_layer_name = best_layers[name_mod1][0]
            best_mod2_layer = rdm_mod2[mod2_layer_name]
            best_mod1_layer = rdm_mod1[mod1_layer_name]
            p_true, l1 = permutation_test(best_mod1_layer, best_mod2_layer, avg_roi)
            # filter with broadcasting the amount of values that are GREATER than the true SPEARMAN R
            count_of_greater_values = np.sum(l1 > p_true) / 300
            l_diff.append(count_of_greater_values)
            l_p.append(p_true)
        df[f"{name_mod1}-{name_mod2} difference"] = np.array(l_p)
        df[f"{name_mod1}-{name_mod2} p-value"] = np.array(l_diff)
    return df

In [29]:
df_perm_test_img = permutation_df([clip_img, albef_img, vilt], [vit, vit, vit], 
                                  ['Clip', 'Albef', 'Vilt'], ['Vit','Vit','Vit'], best_image_layers, avg)

100%|███████████████████████████████████████████| 27/27 [27:27<00:00, 61.01s/it]
100%|███████████████████████████████████████████| 27/27 [22:36<00:00, 50.25s/it]
100%|███████████████████████████████████████████| 27/27 [22:11<00:00, 49.30s/it]


In [30]:
df_perm_test_img

Unnamed: 0,Clip-Vit difference,Clip-Vit p-value,Albef-Vit difference,Albef-Vit p-value,Vilt-Vit difference,Vilt-Vit p-value
EBA,0.077118,0.003333,0.025333,0.003333,0.081555,0.0
FBA-1,0.057519,0.003333,0.038047,0.003333,0.049609,0.003333
FBA-2,0.067216,0.0,0.012429,0.01,0.044415,0.003333
FFA-1,0.069194,0.0,0.036364,0.003333,0.046411,0.0
FFA-2,0.065035,0.003333,0.007269,0.016667,0.039302,0.003333
OFA,0.011648,0.02,0.01033,0.016667,-0.003059,0.846667
OPA,0.033127,0.003333,0.036649,0.0,0.003553,0.12
OWFA,0.008189,0.03,0.001501,0.163333,0.001979,0.183333
PPA,0.038123,0.0,0.051255,0.0,0.001324,0.253333
RSC,0.011251,0.006667,0.023817,0.003333,-0.036435,0.996667


In [31]:
df_perm_test_img.to_csv('permutation-test-img.csv', index=True)