In [13]:
import pickle
import pandas as pd
file_path = 'BestTextLayersAlgonauts2.pkl'

# Load the dictionary from the file using Pickle
with open(file_path, 'rb') as file:
    best_text_layers = pickle.load(file)

In [3]:
import os
import numpy as np
import torch
from scipy.spatial.distance import pdist, squareform
from scipy import stats
import sys
sys.path.append("/Users/sebastian/University/Bachelor/Thesis/ba-thesis/")
from utils import load_files, get_rdms, get_rdm, get_spearmanr, inter_intra_similarity, get_upper_triu
%load_ext autoreload
%autoreload 2

In [4]:
def load_tensors(directory):
    d = {}
    for key in os.listdir(directory):
        if key.endswith(".pt"):
            filename = os.path.join(directory, key)
            tensor = torch.load(filename)
            d[key[:-3]] = tensor
    return d

In [5]:
def get_spearmanr_between_tensors(tensor1, tensor2, size):
    v1 = get_upper_triu(tensor1, size)
    v2 = get_upper_triu(tensor2, size)
    res = stats.spearmanr(v1, v2)
    return res.correlation

In [11]:
avg_visual = load_tensors("avg_visual_lhrh/")
avg_words = load_tensors("avg_words_lhrh/")
avg_faces = load_tensors("avg_faces_lhrh/")
avg_streams = load_tensors("avg_streams_lhrh/")
avg_places = load_tensors("avg_places_lhrh/")
avg_body = load_tensors("avg_body_lhrh/")
clip_txt = load_tensors("clip_txt_rdms/")
albef_txt = load_tensors("albef3_txt_rdms/")
albef_multi = load_tensors("albef3_multi_rdms/")
vilt = load_tensors("vilt_rdms/")
gpt = load_tensors("gpt_rdms/")
bert = load_tensors("bert_txt_rdms/")

In [7]:
avg = avg_visual | avg_words | avg_faces | avg_streams | avg_places | avg_body

In [8]:
def get_random_indices(size, amount_of_changed_idx):
    rand_arr = np.random.choice(np.arange(0, size-1), replace=False, size=amount_of_changed_idx)
    return rand_arr

In [9]:
def permutation_test(model1_layer, model2_layer, roi):
    np.random.seed(0)
    l = []
    roi_flat = get_upper_triu(roi, 872)
    model1_layer_flat = get_upper_triu(model1_layer, 872)
    model2_layer_flat = get_upper_triu(model2_layer, 872)
    corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
    corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
    p_true = corr_model1 - corr_model2
    for i in range(300):
        rand_int = np.random.randint(379756)
        rand_arr = get_random_indices(379756, rand_int)
        model1_layer_flat = get_upper_triu(model1_layer, 872)
        model2_layer_flat = get_upper_triu(model2_layer, 872)
        model1_layer_flat[rand_arr], model2_layer_flat[rand_arr] = model2_layer_flat[rand_arr], model1_layer_flat[rand_arr]
        corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
        corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
        l.append(corr_model1-corr_model2)
    return p_true, l

In [10]:
from tqdm import tqdm
def permutation_df(rdm_mods1, rdm_mods2, name_mods1, name_mods2, best_layers_all_regions, avg):
    list_of_rows = sorted(avg.keys())

    df = pd.DataFrame(index=list_of_rows)
    for i in range(len(rdm_mods1)):
        l_diff = []
        l_p = []
        rdm_mod1 = rdm_mods1[i]
        rdm_mod2 = rdm_mods2[i]
        name_mod1 = name_mods1[i]
        name_mod2 = name_mods2[i]
        for k, v in tqdm(sorted(avg.items())):
            avg_roi = avg[k]
            best_layers = best_layers_all_regions[k]
            mod2_layer_name = best_layers[name_mod2][0]
            mod1_layer_name = best_layers[name_mod1][0]
            best_mod2_layer = rdm_mod2[mod2_layer_name]
            best_mod1_layer = rdm_mod1[mod1_layer_name]
            p_true, l1 = permutation_test(best_mod1_layer, best_mod2_layer, avg_roi)
            # filter with broadcasting the amount of values that are GREATER than the true SPEARMAN R
            count_of_greater_values = np.sum(l1 > p_true) / 300
            l_diff.append(count_of_greater_values)
            l_p.append(p_true)
        df[f"{name_mod1}-{name_mod2} difference"] = np.array(l_p)
        df[f"{name_mod1}-{name_mod2} p-value"] = np.array(l_diff)
    return df

In [20]:
df_res_perm_txt = permutation_df([clip_txt, albef_txt, albef_multi], [gpt, bert, bert], ['Clip', 'Albef Text', 'Albef Multi'], ['Gpt', 'Bert', 'Bert'], best_text_layers, avg)
df_res_perm_txt

100%|█████████████████████████████████████████████| 27/27 [30:36<00:00, 68.03s/it]
100%|█████████████████████████████████████████████| 27/27 [33:28<00:00, 74.38s/it]
100%|█████████████████████████████████████████████| 27/27 [29:08<00:00, 64.76s/it]


Unnamed: 0,Clip-Gpt difference,Clip-Gpt p-value,Albef Text-Bert difference,Albef Text-Bert p-value,Albef Multi-Bert difference,Albef Multi-Bert p-value
EBA,0.114782,0.003333,-0.061703,0.996667,-0.010832,0.976667
FBA-1,0.054937,0.006667,-0.050926,1.0,-0.009668,0.973333
FBA-2,0.101544,0.003333,-0.054929,1.0,-0.014421,0.983333
FFA-1,0.083714,0.006667,-0.049577,0.996667,-0.016287,0.99
FFA-2,0.099872,0.003333,-0.054179,0.996667,-0.014053,0.983333
OFA,0.020085,0.006667,-0.015654,0.986667,-0.003338,0.883333
OPA,0.024839,0.013333,-0.027765,0.996667,0.003148,0.11
OWFA,0.01418,0.036667,-0.006343,0.963333,-0.000237,0.583333
PPA,0.047742,0.006667,-0.026824,0.99,0.001587,0.16
RSC,0.008723,0.043333,-0.016896,0.993333,0.001348,0.216667


In [19]:
df_res_perm_txt

NameError: name 'df_res_perm_txt' is not defined

In [17]:
best_text_layers

{'OFA': {'Albef Text': ('ALBEF Text Layer 6', 0.011354626952903332),
  'Albef Multi': ('ALBEF Multi Layer 5', 0.023671037303942738),
  'Gpt': ('GPT Text Layer 12', 0.03437040433239407),
  'Bert': ('Bert Text Layer 4', 0.027009034975266143),
  'Clip': ('CLIP Text Layer 4', 0.05445578155766811)},
 'FFA-1': {'Albef Text': ('ALBEF Text Layer 6', 0.03540818060565114),
  'Albef Multi': ('ALBEF Multi Layer 5', 0.0686979322869777),
  'Gpt': ('GPT Text Layer 12', 0.09901205337515598),
  'Bert': ('Bert Text Layer 4', 0.08498483134088793),
  'Clip': ('CLIP Text Layer 11', 0.1827256289947488)},
 'FFA-2': {'Albef Text': ('ALBEF Text Layer 6', 0.03396416473710426),
  'Albef Multi': ('ALBEF Multi Layer 5', 0.07409001698156488),
  'Gpt': ('GPT Text Layer 12', 0.10734048736967997),
  'Bert': ('Bert Text Layer 3', 0.08814305462100133),
  'Clip': ('CLIP Text Layer 3', 0.20721290099858858)},
 'PPA': {'Albef Text': ('ALBEF Text Layer 3', 0.1122910168950697),
  'Albef Multi': ('ALBEF Multi Layer 5', 0.10774