In [2]:
import pickle
import pandas as pd
file_path = 'BestTextLayers.pkl'

# Load the dictionary from the file using Pickle
with open(file_path, 'rb') as file:
    best_text_layers = pickle.load(file)

In [24]:
best_text_layers

{'RSC': {'Albef Text': [('ALBEF Text Layer 3', 0.11704741563602626)],
  'Albef Multi': [('ALBEF Multi Layer 5', 0.12750902408290732)],
  'Gpt': [('GPT Text Layer 12', 0.026601473699243393)],
  'Vilt': [('VILT Layer 2', 0.12977321166840503)],
  'Clip': [('CLIP Text Layer 11', 0.14439979296429806)],
  'Bert': [('Bert Text Layer 6', 0.09495388492239105)]},
 'PPA': {'Albef Text': [('ALBEF Text Layer 3', 0.1122910168950697)],
  'Albef Multi': [('ALBEF Multi Layer 5', 0.10774053367046299)],
  'Gpt': [('GPT Text Layer 6', 0.036853637427702035)],
  'Vilt': [('VILT Layer 2', 0.1180560550965917)],
  'Clip': [('CLIP Text Layer 11', 0.14693224789006692)],
  'Bert': [('Bert Text Layer 6', 0.07421077905528684)]},
 'OPA': {'Albef Text': [('ALBEF Text Layer 3', 0.07724056604647302)],
  'Albef Multi': [('ALBEF Multi Layer 5', 0.08588432389088141)],
  'Gpt': [('GPT Text Layer 6', 0.031172272680497565)],
  'Vilt': [('VILT Layer 2', 0.11299644486959987)],
  'Clip': [('CLIP Text Layer 7', 0.112015371500840

In [16]:
best_text_layers['EVC'] = best_text_layers.pop('ESC')

In [18]:
import os
import numpy as np
import torch
from scipy.spatial.distance import pdist, squareform
from scipy import stats
import sys
sys.path.append("/Users/sebastian/University/Bachelor/Thesis/ba-thesis/")
from utils import load_files, get_rdms, get_rdm, get_spearmanr, inter_intra_similarity, get_upper_triu
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
def load_tensors(directory):
    d = {}
    for key in os.listdir(directory):
        if key.endswith(".pt"):
            filename = os.path.join(directory, key)
            tensor = torch.load(filename)
            d[key[:-3]] = tensor
    return d

In [5]:
def get_spearmanr_between_tensors(tensor1, tensor2, size):
    v1 = get_upper_triu(tensor1, size)
    v2 = get_upper_triu(tensor2, size)
    res = stats.spearmanr(v1, v2)
    return res.correlation

In [12]:
avg = load_tensors("avg/")
clip_txt = load_tensors("sclip_txt_rdms/")
albef_txt = load_tensors("salbef_txt_rdms/")
albef_multi = load_tensors("salbef_multi_rdms/")
vilt = load_tensors("svilt_rdms/")
gpt = load_tensors("sgpt_rdms/")
bert = load_tensors("sbert_txt_rdms/")

In [8]:
def get_random_indices(size, amount_of_changed_idx):
    rand_arr = np.random.choice(np.arange(0, size-1), replace=False, size=amount_of_changed_idx)
    return rand_arr

In [9]:
def permutation_test(model1_layer, model2_layer, roi):
    np.random.seed(0)
    l = []
    roi_flat = get_upper_triu(roi, 81)
    model1_layer_flat = get_upper_triu(model1_layer, 81)
    model2_layer_flat = get_upper_triu(model2_layer, 81)
    corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
    corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
    p_true = corr_model1 - corr_model2
    for i in range(1000):
        rand_int = np.random.randint(3240)
        rand_arr = get_random_indices(3240, rand_int)
        model1_layer_flat = get_upper_triu(model1_layer, 81)
        model2_layer_flat = get_upper_triu(model2_layer, 81)
        model1_layer_flat[rand_arr], model2_layer_flat[rand_arr] = model2_layer_flat[rand_arr], model1_layer_flat[rand_arr]
        corr_model1 = stats.spearmanr(model1_layer_flat, roi_flat).correlation
        corr_model2 = stats.spearmanr(model2_layer_flat, roi_flat).correlation
        l.append(corr_model1-corr_model2)
    return p_true, l

In [10]:
def permutation_df(list_of_rows, rdm_mod1, rdm_mod2, name_mod1, name_mod2, best_layers_all_regions):
    list_of_columns = avg.keys()

    df = pd.DataFrame(index=list_of_rows, columns=list_of_columns)

    for k, v in sorted(avg.items()):
        avg_roi = avg[k]
        best_layers = best_layers_all_regions[k]
        mod2_layer_name = best_layers[name_mod2][0][0]
        mod1_layer_name = best_layers[name_mod1][0][0]
        best_mod2_layer = rdm_mod2[mod2_layer_name]
        best_mod1_layer = rdm_mod1[mod1_layer_name]
        p_true, l = permutation_test(best_mod1_layer, best_mod2_layer, avg_roi)
        # filter with broadcasting the amount of values that are GREATER than the true SPEARMAN R
        count_of_greater_values = np.sum(l > p_true) / 1000
        df[k] = np.array([mod1_layer_name, mod2_layer_name, p_true, count_of_greater_values])
    return df

In [19]:
list_of_rows = ['Layer CLIP', 'Layer GPT', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, clip_txt, gpt, 'Clip', 'Gpt', best_text_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer CLIP,CLIP Text Layer 11,CLIP Text Layer 11,CLIP Text Layer 11,CLIP Text Layer 7,CLIP Text Layer 4,CLIP Text Layer 12
Layer GPT,GPT Text Layer 6,GPT Text Layer 12,GPT Text Layer 12,GPT Text Layer 6,GPT Text Layer 12,GPT Text Layer 1
Difference,0.11007861046236489,0.11779831926505466,0.05732098358872349,0.08084309882034296,0.04093752647982047,0.05354160367386726
Permutation Test Val,0.027,0.033,0.06,0.039,0.142,0.088


In [21]:
list_of_rows = ['Layer Albef', 'Layer Bert', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, albef_txt, bert, 'Albef Text', 'Bert', best_text_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer Albef,ALBEF Text Layer 3,ALBEF Text Layer 3,ALBEF Text Layer 6,ALBEF Text Layer 3,ALBEF Text Layer 6,ALBEF Text Layer 6
Layer Bert,Bert Text Layer 6,Bert Text Layer 6,Bert Text Layer 3,Bert Text Layer 4,Bert Text Layer 11,Bert Text Layer 7
Difference,0.03808023783978287,0.022093530713635207,0.015766539320792042,0.01913440220773166,0.0428304009396886,0.020385693980189273
Permutation Test Val,0.107,0.173,0.276,0.216,0.12,0.213


In [22]:
list_of_rows = ['Layer Albef', 'Layer Bert', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, albef_multi, bert, 'Albef Multi', 'Bert', best_text_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer Albef,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5,ALBEF Multi Layer 5
Layer Bert,Bert Text Layer 6,Bert Text Layer 6,Bert Text Layer 3,Bert Text Layer 4,Bert Text Layer 11,Bert Text Layer 7
Difference,0.033529754615176155,0.032555139160516264,0.03216944673271019,0.027778160052140052,0.04943057649376494,0.012281650026547714
Permutation Test Val,0.107,0.124,0.133,0.185,0.085,0.27


In [23]:
list_of_rows = ['Layer Vilt', 'Layer Bert', 'Difference', 'Permutation Test Val']
df = permutation_df(list_of_rows, vilt, bert, 'Vilt', 'Bert', best_text_layers)
df

Unnamed: 0,PPA,RSC,PFS,OPA,LOC,EVC
Layer Vilt,VILT Layer 2,VILT Layer 2,VILT Layer 10,VILT Layer 2,VILT Layer 12,VILT Layer 2
Layer Bert,Bert Text Layer 6,Bert Text Layer 6,Bert Text Layer 3,Bert Text Layer 4,Bert Text Layer 11,Bert Text Layer 7
Difference,0.043845276041304865,0.03481932674601397,0.031438795706558276,0.05489028103085851,0.04395558521595342,0.04962246093571458
Permutation Test Val,0.091,0.149,0.16,0.084,0.099,0.087
