In [None]:
import numpy as np
import pandas as pd
import os
import glob
import joblib
import torch
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.legend_handler import HandlerBase
import seaborn as sns

from nilearn import plotting 
from nilearn.image import smooth_img, swap_img_hemispheres

from nilearn.input_data import NiftiMasker
import nibabel as nib

import scipy.stats
from scipy.stats import pearsonr, spearmanr, ttest_ind
import statsmodels.api as sm

import llms_brain_lateralization as lbl
from llms_brain_lateralization import make_dir

In [None]:
rc={'font.size': 12, 'axes.labelsize': 14, 
    'xtick.labelsize': 12, 'ytick.labelsize': 12}
sns.set(rc=rc)
sns.set_style('darkgrid')

In [None]:
save_fig = True

In [None]:
if save_fig:
    fig_folder = lbl.figures_folder
    make_dir(fig_folder)

In [None]:
nifti_masker = NiftiMasker(mask_img='mask_lpp_en.nii.gz')
nifti_masker.fit()

n_voxels = nifti_masker.n_elements_

In [None]:
glm_folder = lbl.llms_brain_correlations

In [None]:
steps = [0, 1, 8, 16, 64, 128, 256, 512, 1000, 3000, 13000, 23000, 33000, 43000, 53000, 63000, 73000, 83000, 93000, 103000, 113000, 123000, 133000, 143000]

In [None]:
model_names = ["EleutherAI/pythia-12b-deduped-step:" + str(step) for step in steps]
n_layers = 32

In [None]:
n_models = len(model_names)
print(n_models)

In [None]:
corr_layers_voxels_models = []
for model_name in model_names:
    corr_layers_voxels = []
    for idx_layer in range(32):
        filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, idx_layer))
        with open(filename, 'rb') as f:
            corr_voxels = joblib.load(f)
        corr_layers_voxels.append(corr_voxels)
        print(corr_layers_voxels)
    corr_layers_voxels_models.append(np.array(corr_layers_voxels))
print(corr_layers_voxels_models)

In [None]:
print(corr_layers_voxels_models)
color_models = list(sns.color_palette('plasma', n_colors=len(corr_layers_voxels_models)))
print(len(corr_layers_voxels_models))

In [None]:
arr = []
arr2 = []
for corr_layers_voxels in corr_layers_voxels_models:
    arr.append(np.mean(np.argmax(corr_layers_voxels, axis=0)))
    arr2.append(np.var(np.argmax(corr_layers_voxels, axis=0)) / 10)
plt.plot(steps, arr)
plt.plot(steps, arr2)
plt.title("argmax moyen en bleu, variance normalisée en rouge")
plt.show()

In [None]:
################################ ROIs analysis ################################

roi_names = ['TP', 'aSTS', 'pSTS', 'AG_TPJ', 'BA44', 'BA45', 'BA47']
n_rois = len(roi_names)
folder_mask = 'roi_masks'

roi_list = [os.path.join(folder_mask, '{}.nii.gz'.format(roi_name)) for roi_name in roi_names]
roi_list_r = [swap_img_hemispheres(roi_mask) for roi_mask in roi_list]
rois_t = nifti_masker.transform(roi_list + roi_list_r)
idx_rois = [np.flatnonzero(roi_t == 1.0) for roi_t in rois_t]

In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
data = [np.argmax(corr_layers_voxels, axis=0) for corr_layers_voxels in corr_layers_voxels_models]


arr = np.zeros(n_voxels)
arr = arr * 0
arr[np.where(np.abs(data[-1]-8) <= 1)] = 1

imgtmp = nifti_masker.inverse_transform(arr)

temp_filename = f'temppp_{0}.png'
plotting.plot_img_on_surf(imgtmp,
                        surf_mesh='fsaverage5',
                        views=['lateral'],
                        hemispheres=['left', 'right'],
                        cmap='Spectral_r',
                        vmax=1,
                        symmetric_cbar=False,
                        cbar_tick_format='%.2f',
                        colorbar=True,
                        title="Cluster layer 7, 8, 9, 12b")
plt.show()

arr = arr * 0
arr[np.where(np.abs(data[-1]-8) <= 1)] = 1
arr[idx_rois[1]] = 2

imgtmp = nifti_masker.inverse_transform(arr)

temp_filename = f'temppp_{0}.png'
plotting.plot_img_on_surf(imgtmp,
                        surf_mesh='fsaverage5',
                        views=['lateral'],
                        hemispheres=['left', 'right'],
                        cmap='Spectral_r',
                        vmax=2,
                        symmetric_cbar=False,
                        cbar_tick_format='%.2f',
                        colorbar=True,
                        title="aires a grand offset")
plt.show()

# Création des traces pour chaque distribution
fig = go.Figure()

for i, dist in enumerate(data):
    fig.add_trace(
        go.Histogram(
            x=dist,
            name=f'Time {i}',
            visible=False,  # Masquer toutes les traces sauf la première
        )
    )

# Rendre la première trace visible
fig.data[0].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * len(data)},
              {"title": f"Distribution at Time {steps[i]}"}],
    )
    gstep["args"][0]["visible"][i] = True  # Rendre la trace actuelle visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distribution Over Time"
)

fig.show()


In [None]:
wh = np.where(np.abs(data[-1]-8) <= 1)
arr = np.array(corr_layers_voxels_models[-1])
print(arr.shape)
arr = arr[:, np.where(np.abs(data[-1]-8) <= 1)[0]]
print(arr.shape)
arr = np.mean(arr, axis=1)
print(arr.shape)
plt.plot(arr)
plt.title("corrélation par layer des zones des cortex auditifs et autres voxels du cluster 8")

In [None]:
arr = np.array(corr_layers_voxels_models[-1])
print(arr.shape)
arr = arr * 0
wh = np.where(np.abs(data[-1]-8) <= 1)
arr[:, wh] = 1
imgtmp = nifti_masker.inverse_transform(arr[0])
display = plotting.plot_glass_brain(imgtmp, threshold=0., display_mode='lyrz', 
                          vmin=0., cmap='Spectral_r', 
                          plot_abs=True, colorbar=True)


In [None]:
wh = np.where(np.abs(data[-1]-8) <= 1)
arr = np.array(corr_layers_voxels_models[-1])
print(arr.shape)
arr = arr[:, np.where(np.abs(data[-1]-8) <= 1)[0]]
print(arr.shape)
arr = np.mean(arr, axis=1)
print(arr.shape)
plt.plot(arr)
plt.title("corrélation par layer des zones des cortex auditifs et autres voxels du cluster 8")

In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
data = [np.argmax(corr_layers_voxels, axis=0) for corr_layers_voxels in corr_layers_voxels_models]


arr = np.zeros(n_voxels)
arr = arr * 0
arr[np.where(np.abs(data[-1]-15) <= 2)] = 1

imgtmp = nifti_masker.inverse_transform(arr)

temp_filename = f'temppp_{0}.png'
plotting.plot_img_on_surf(imgtmp,
                        surf_mesh='fsaverage5',
                        views=['lateral'],
                        hemispheres=['left', 'right'],
                        cmap='Spectral_r',
                        vmax=1,
                        symmetric_cbar=False,
                        cbar_tick_format='%.2f',
                        colorbar=True,
                        title="Cluster layer 7, 8, 9, 12b")
plt.show()


In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
data = [np.argmax(corr_layers_voxels, axis=0) for corr_layers_voxels in corr_layers_voxels_models]


arr = np.zeros(n_voxels)
arr = arr * 0
arr[np.where(np.abs(data[-1]-19) <= 2)] = 1

imgtmp = nifti_masker.inverse_transform(arr)

temp_filename = f'temppp_{0}.png'
plotting.plot_img_on_surf(imgtmp,
                        surf_mesh='fsaverage5',
                        views=['lateral'],
                        hemispheres=['left', 'right'],
                        cmap='Spectral_r',
                        vmax=1,
                        symmetric_cbar=False,
                        cbar_tick_format='%.2f',
                        colorbar=True,
                        title="Cluster layer 7, 8, 9, 12b")
plt.show()

In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
data = [np.mean(corr_layers_voxels, axis=0) for corr_layers_voxels in corr_layers_voxels_models]

# Création des traces pour chaque distribution
fig = go.Figure()

for i, dist in enumerate(data):
    fig.add_trace(
        go.Histogram(
            x=dist,
            name=f'Time {i}',
            visible=False,  # Masquer toutes les traces sauf la première
        )
    )

# Rendre la première trace visible
fig.data[0].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * len(data)},
              {"title": f"Distribution at Time {i}"}],
    )
    gstep["args"][0]["visible"][i] = True  # Rendre la trace actuelle visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distribution Over Time"
)

fig.show()


In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
data = [np.mean(corr_layers_voxels, axis=1) for corr_layers_voxels in corr_layers_voxels_models]
data2 = [np.mean(d) for d in data]
plt.plot(range(len(data2)), data2)
plt.show()
# Création des traces pour chaque distribution
fig = go.Figure()

for i, dist in enumerate(data):
    fig.add_trace(
        go.Histogram(
            x=dist,
            name=f'Time {i}',
            visible=False,  # Masquer toutes les traces sauf la première
        )
    )

# Rendre la première trace visible
fig.data[0].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * len(data)},
              {"title": f"Distribution at Time {i}"}],
    )
    gstep["args"][0]["visible"][i] = True  # Rendre la trace actuelle visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distribution Over Time"
)

fig.show()


In [None]:
# mean correlation for each voxel, for each model, using best layer for each voxel
# n_models x n_voxels
corr_voxels_models = []
for corr_layers_voxels in corr_layers_voxels_models:
    if corr_layers_voxels.size == 0:
        # Handle empty array case, e.g., append a default value or skip
        corr_voxels_models.append(np.nan)  # or any default value you prefer
    else:
        corr_voxels_models.append(np.max(corr_layers_voxels, axis=0))
corr_voxels_models = np.array(corr_voxels_models)

# mean correlation for each model, using best layer for each voxel, averaged over all voxels
# n_voxels
corr_models = np.array([np.mean(corr_voxels) for corr_voxels in corr_voxels_models])

# mean correlation for each layer of each model, averaged over all voxels
# n_models x n_layers 
corr_layers_models = [np.mean(corr_layers, axis=1) for corr_layers in corr_layers_voxels_models]
# restrict to left and right hemisphere
corr_l_models = corr_voxels_models[:,:n_voxels//2]
corr_r_models = corr_voxels_models[:,n_voxels//2:]

torch.save([model_names, corr_voxels_models, corr_models, corr_layers_models, corr_l_models, corr_r_models], 'metrics/' + "12bmetrics.pt")


In [None]:
data0 = corr_l_models
data1 = corr_r_models
# Création des traces pour chaque distribution
fig = go.Figure()

for i in range(len(data0)):
    fig.add_trace(
        go.Histogram(
            x=data0[i],
            name=f'Left Step {steps[i]}',
            visible=False,  # Masquer toutes les traces sauf les premières
        )
    )
    fig.add_trace(
        go.Histogram(
            x=data1[i],
            name=f'Right Step {steps[i]}',
            visible=False,  # Masquer toutes les traces sauf les premières
        )
    )

# Rendre les deux premières traces visibles
fig.data[0].visible = True
fig.data[1].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data0)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * (2 * len(data0))},
              {"title": f"Distribution at Time {steps[i]}"}],
    )
    gstep["args"][0]["visible"][2 * i] = True  # Rendre la trace actuelle de data0 visible
    gstep["args"][0]["visible"][2 * i + 1] = True  # Rendre la trace actuelle de data1 visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distributions Over Time"
)

fig.show()

In [None]:
# left: red, right: green (as for port and starboard; should be colorblind compatible though)
l_r_colors = sns.color_palette('colorblind', n_colors=4)[2:][::-1]

In [None]:
np.logspace(2,7,16)[8]

In [None]:
################################ baselines ################################

# look at random embeddings
corr_random_emb_300 = []
for seed in range(1,11):
    model_name ='random_embedding_300d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_random_emb_300.append(corr)
corr_random_emb_300 = np.mean(corr_random_emb_300, axis=0)

corr_random_emb_1024 = []
for seed in range(1,11):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_random_emb_1024.append(corr)
corr_random_emb_1024 = np.mean(corr_random_emb_1024, axis=0)

corr2_random_emb_1024 = []
for seed in range(11,21):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr2_random_emb_1024.append(corr)
corr2_random_emb_1024 = np.mean(corr2_random_emb_1024, axis=0)

corr_random_vec_1024 = []
for seed in range(1,9):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_random_vec_1024.append(corr)
corr_random_vec_1024 = np.mean(corr_random_vec_1024, axis=0)

corr_lograndom_vec_1024 = []
for seed in range(10,18):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_lograndom_vec_1024.append(corr)
corr_lograndom_vec_1024 = np.mean(corr_lograndom_vec_1024, axis=0)

corr_exprandom_vec_1024 = []
for seed in range(30,40):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_exprandom_vec_1024.append(corr)
    print(corr_exprandom_vec_1024)
corr_exprandom_vec_1024 = np.mean(corr_exprandom_vec_1024, axis=0)

corr_cauchyrandom_vec_1024 = []
for seed in range(40,50):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_cauchyrandom_vec_1024.append(corr)
corr_cauchyrandom_vec_1024 = np.mean(corr_cauchyrandom_vec_1024, axis=0)

corr_georandom_vec_1024 = []
for seed in range(50,60):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_georandom_vec_1024.append(corr)
corr_georandom_vec_1024 = np.mean(corr_georandom_vec_1024, axis=0)

corr_lograndom_emb_1024 = []
for seed in range(20,28):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_lograndom_emb_1024.append(corr)
corr_lograndom_emb_1024 = np.mean(corr_lograndom_emb_1024, axis=0)

corr_exprandom_emb_1024 = []
for seed in range(30,39):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_exprandom_emb_1024.append(corr)
corr_exprandom_emb_1024 = np.mean(corr_exprandom_emb_1024, axis=0)

corr_cauchyrandom_emb_1024 = []
for seed in range(40,49):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_cauchyrandom_emb_1024.append(corr)
corr_cauchyrandom_emb_1024 = np.mean(corr_cauchyrandom_emb_1024, axis=0)

corr_georandom_emb_1024 = []
for seed in range(50,59):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_georandom_emb_1024.append(corr)
corr_georandom_emb_1024 = np.mean(corr_georandom_emb_1024, axis=0)

# GloVe
model_name ='glove'
filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
with open(filename, 'rb') as f:
    corr_glove = joblib.load(f)

In [None]:
def plot_xy(xvalue, yvalue, xlabel=None, ylabel=None, logx=False, invert_xaxis=False, figsize=(6.4, 4.8), title=""):        
    xvalue = np.array(xvalue)
    xvalue[xvalue<1] = 1
    fh = plt.figure(figsize=figsize)
    ax = plt.subplot(111)
    fh.suptitle(title)
    sns.scatterplot(x=xvalue,
                    y=yvalue, ax=ax);
    if logx:
        ax.set_xscale('log')
    if invert_xaxis:
        ax.invert_xaxis()
    ax.set_xlabel(xlabel)      
    ax.set_ylabel(ylabel)
    if logx:
         r, p = pearsonr(np.log(xvalue), yvalue)
    else:
         r, p = pearsonr(xvalue, yvalue)
   
    # fh.text(0.15, 0.85,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
    #         ha='left', va='top', fontsize=11)
    return fh

def pvalue2str(pvalue):
    if pvalue <= 0.001:
        return '***'
    elif pvalue <= 0.01:
        return '**'
    elif pvalue <= 0.05:
        return '*'
    else:
        return 'ns'

In [None]:
########################################## Using reliable voxels: best 25% voxels ##########################################

n_trials_split = 10
filename = 'corr_group_split_{}trials.gz'.format(n_trials_split)
corr_split = joblib.load(filename)
corr_split_mean = np.mean(corr_split, axis=0)

is_voxel_reliable = corr_split_mean > np.percentile(corr_split_mean, 75)
nois_voxel_reliable = corr_split_mean < np.percentile(corr_split_mean, 1/10_000)
print(nois_voxel_reliable)
print(np.argmax(nois_voxel_reliable))
#### rv for reliable voxels

# mean correlation for each voxel, for each model, using best layer for each voxel
# n_models x n_voxels
corr_voxels_models_rv = np.array([np.max(corr_layers_voxels[:,is_voxel_reliable], axis=0)
                                  for corr_layers_voxels in corr_layers_voxels_models])

# mean correlation for each model, using best layer for each voxel, averaged over all voxels
# n_voxels
corr_models_rv = np.array([np.mean(corr_voxels[is_voxel_reliable]) for corr_voxels in corr_voxels_models])
nocorr_models_rv = np.array([np.mean(corr_voxels[67]) for corr_voxels in corr_voxels_models])
nocorr_models_rv = np.array(corr_layers_voxels_models[-1])
print("aaaa", np.mean(corr_models_rv))

max_corr_per_voxel = np.max(corr_layers_voxels_models[-1], axis=0)
num_voxels = corr_layers_voxels_models[-1].shape[1]
n_pire_voxels = int(0.05 * num_voxels)
pire_voxel_indices = np.argsort(max_corr_per_voxel)[-n_pire_voxels:]
argmax_layers_pire_voxels = np.argmax(corr_layers_voxels_models[-1][:, pire_voxel_indices], axis=0)
print("Indices des pires voxels :", pire_voxel_indices)
print("Argmax (layer) des pires voxels :", argmax_layers_pire_voxels)
print(np.mean(argmax_layers_pire_voxels))
# mean correlation for each layer of each model, averaged over all voxels
# n_models x n_layers 
corr_layers_models_rv = [np.mean(corr_layers[:, is_voxel_reliable], axis=1) for corr_layers in corr_layers_voxels_models]

argmax_layers_all_voxels = np.argmax(corr_layers_voxels_models[-1], axis=0)
voxel_indices_argmax_gt_30 = np.where(np.abs(argmax_layers_all_voxels-7) <= 1)[0]
ppp = np.zeros(len(corr_voxels_models[-1]))
ppp = np.array(ppp)
ppp[voxel_indices_argmax_gt_30] = 1
imgtmp = nifti_masker.inverse_transform(ppp)
display = plotting.plot_glass_brain(imgtmp, threshold=0., display_mode='lyrz', 
                          vmin=0., cmap='Spectral_r', 
                          plot_abs=True)

max_corr_for_selected_voxels = np.max(np.array(corr_layers_voxels_models[-1])[:, voxel_indices_argmax_gt_30], axis=0)
print(max_corr_for_selected_voxels)

# restrict to left and right hemisphere
# n_models x n_voxels//2
corr_l_models_rv = corr_l_models[:,is_voxel_reliable[:n_voxels//2]]
corr_r_models_rv = corr_r_models[:,is_voxel_reliable[n_voxels//2:]]

fh = plt.figure(figsize=(5,3))
ax = plt.subplot(111)
sns.kdeplot(corr_split_mean, cut=0., ax=ax)
plt.axvline(np.percentile(corr_split_mean, 75), ls='--', c='0.4');
plt.xlabel('correlation')
plt.ylabel('density')
plt.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'reliable_voxels_distribution.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

imgtmp = nifti_masker.inverse_transform(corr_split_mean)
fh = plt.figure(figsize=(12,3))
ax = plt.subplot(111)
display = plotting.plot_glass_brain(imgtmp, threshold=0., display_mode='lyrz', 
                          vmin=0., cmap='Spectral_r', 
                          plot_abs=True, colorbar=True, axes=ax)
display.add_contours(imgtmp, levels=[np.percentile(corr_split_mean, 75)], colors='0.2', linewidths=1.2, alpha=0.9, linestyles='dashed')

display._colorbar_ax.set_yticks([0., np.percentile(corr_split_mean, 75), np.max(corr_split_mean)])
ylim = display._colorbar_ax.get_ylim()  
ax2 = display._colorbar_ax.twinx()
ax2.set_ylim(ylim)
ax2.set_yticks([0., np.percentile(corr_split_mean, 75), np.max(corr_split_mean)], ['0%', '75%', '100%'])
ax2.yaxis.set_tick_params(width=0)
#display._cbar._
ax2.axhline(np.percentile(corr_split_mean, 75), ls='--', lw=1.2, color='0.2')

plotting.show() 

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'reliable_voxels_75.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
print(is_voxel_reliable)
data = [np.argmax(corr_layers_voxels[:, is_voxel_reliable], axis=0) for corr_layers_voxels in corr_layers_voxels_models]

fig = go.Figure()

for i, dist in enumerate(data):
    fig.add_trace(
        go.Histogram(
            x=dist,
            name=f'Time {i}',
            visible=False,  # Masquer toutes les traces sauf la première
        )
    )

# Rendre la première trace visible
fig.data[0].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * len(data)},
              {"title": f"Distribution at Time {i}"}],
    )
    gstep["args"][0]["visible"][i] = True  # Rendre la trace actuelle visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distribution Over Time"
)

fig.show()


In [None]:
data = [np.argmax(corr_layers_voxels[:, :], axis=0) for corr_layers_voxels in corr_layers_voxels_models][-1]
data = np.array(data)
print(data.shape)
imgtmp = nifti_masker.inverse_transform(data)

vmax = 31

fh, axes = plotting.plot_img_on_surf(imgtmp,
                                     surf_mesh='fsaverage5',
                                     views=['lateral'],
                                     hemispheres=['left', 'right'],
                                     vmin=0., vmax=vmax,
                                     cmap='Spectral_r',
                                     symmetric_cbar=False,
                                     cbar_tick_format='%.2f',
                                     colorbar=True,
                                     title='trained best layer')
for ax in axes[0:2]:
    ax.set_box_aspect(None, zoom=1.45)
fh.set_size_inches(7, 4.3)
plotting.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'brain_best_over_random_emb.pdf'), bbox_inches='tight', 
               transparent=True, facecolor=(1,1,1,0))
    
######## worst model ########

data = [np.argmax(corr_layers_voxels[:, :], axis=0) for corr_layers_voxels in corr_layers_voxels_models][0]
imgtmp = nifti_masker.inverse_transform(data)

fh, axes = plotting.plot_img_on_surf(imgtmp,
                                     surf_mesh='fsaverage5',
                                     views=['lateral'],
                                     hemispheres=['left', 'right'],
                                     vmin=0., vmax=vmax,
                                     cmap='Spectral_r',
                                     symmetric_cbar=False,
                                     cbar_tick_format='%.2f',
                                     colorbar=True,
                                     title="untrained")
for ax in axes[0:2]:
    ax.set_box_aspect(None, zoom=1.45)
fh.set_size_inches(7, 4.3)
plotting.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import imageio
from nilearn import plotting

# Les données doivent être préparées à l'avance
# steps = [1_000, *range(10_000, 140_000, 10_000), 143000]
# corr_models_rv = [...]
vmax = 32
data = [np.argmax(corr_layers_voxels[:, :], axis=0) for corr_layers_voxels in corr_layers_voxels_models]
for step_idx in range(len(steps)):
    idx_best_model = step_idx
    print(f'Generating image for step {steps[step_idx]}')
    imgtmp = nifti_masker.inverse_transform(data[step_idx])
    
    # Enregistrez l'image directement sans utiliser axes
    temp_filename = f'temppp_{step_idx}.png'
    plotting.plot_img_on_surf(imgtmp,
                              surf_mesh='fsaverage5',
                              views=['lateral'],
                              hemispheres=['left', 'right'],
                              vmin=0., vmax=vmax,
                              cmap='Spectral_r',
                              symmetric_cbar=False,
                              cbar_tick_format='%.2f',
                              colorbar=True,
                              title=str(steps[step_idx]))
    
    plt.savefig(temp_filename)
    plt.close()

# Optionnel : sauvegardez toutes les images sous forme de GIF pour vérification
images = [imageio.imread(f'temppp_{i}.png') for i in range(len(steps))]
imageio.mimsave('brain_correlations.gif', images, duration=0.5)


In [None]:
import ipywidgets as widgets
from IPython.display import display, Image

# Créez un widget slider
step_slider = widgets.IntSlider(min=0, max=len(steps)-1, step=1, description='Step Index')

# Fonction pour afficher une image donnée par l'indice du slider
def show_image(step_idx):
    display(Image(filename=f'temppp_{step_idx}.png'))

# Connectez le slider à la fonction de plot
interactive_plot = widgets.interactive(show_image, step_idx=step_slider)
display(interactive_plot)


In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
data = [np.mean(corr_layers_voxels[:, is_voxel_reliable], axis=0) for corr_layers_voxels in corr_layers_voxels_models]
print(np.shape(data))
# Création des traces pour chaque distribution
fig = go.Figure()

for i, dist in enumerate(data):
    fig.add_trace(
        go.Histogram(
            x=dist,
            name=f'Time {i}',
            visible=False,  # Masquer toutes les traces sauf la première
        )
    )

# Rendre la première trace visible
fig.data[0].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * len(data)},
              {"title": f"Distribution at Time {i}"}],
    )
    gstep["args"][0]["visible"][i] = True  # Rendre la trace actuelle visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distribution Over Time"
)

fig.show()


In [None]:
fh = plot_xy(steps, [i for i in corr_models], 
             xlabel='epoch', ylabel='brain correlation', 
             logx=True, title='brain correlation per log of training epoch (pythia 12b)')
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_params_corr.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
import numpy as np
from scipy.optimize import curve_fit
from scipy.special import expit  # C'est la fonction sigmoid
from tqdm import tqdm


# Fonction sigmoïde modélisée
def sigmoid_model(x, a, b, lambda_):
    return a + abs(lambda_) * expit(abs(1) * x + b)

# Estimation des paramètres pour chaque voxel
def estimate_parameters(layers, arr, tresh=0):
    params = []
    for voxel_data in tqdm(arr.T):  # On transpose l'array pour itérer sur les voxels
        initial_guess = [1, 1, 1]
        if True:
            if tresh >= 0:
                if np.max(voxel_data) > tresh:
                    popt, _ = curve_fit(sigmoid_model, layers, voxel_data, p0=initial_guess, maxfev=50000)
                    # x_fit = np.linspace(min(layers), max(layers), 100)
                    # y_fit = sigmoid_model(x_fit, *popt)
                    # plt.scatter(layers, voxel_data, label='Données réelles', color='red')
                    # plt.plot(x_fit, y_fit, label='Ajustement sigmoïde', color='blue')
                    # plt.show()
                    # input()
                    # break
                else:
                    popt = [0, 0, 0]
            else:
                if np.max(voxel_data) < -tresh:
                    popt, _ = curve_fit(sigmoid_model, layers, voxel_data, p0=initial_guess, maxfev=50000)
                else:
                    popt = [0, 0, 0]
        # except Exception:
        #     popt = [0, 0, 0]
        params.append(np.abs(popt))
    return np.array(params)
arr = np.array(corr_voxels_models) - corr_random_emb_1024
# print(arr)
# Exemple d'utilisation :
layers = np.log(steps)
layers[0] = - 10
params_voxels = estimate_parameters(layers, arr, 0.)
print(np.array(params_voxels).shape)

In [None]:
offset = np.array(params_voxels)[:, 1]
data = offset
imgtmp = nifti_masker.inverse_transform(data)
plotting.plot_img_on_surf(imgtmp,
                          surf_mesh='fsaverage5',
                          views=['lateral'],
                          hemispheres=['left', 'right'],
                          cmap='Spectral_r',
                          vmin=5, vmax=7.5,
                          symmetric_cbar=False,
                          cbar_tick_format='%.2f',
                          colorbar=True,
                          title="offset de la sigmoid modélisant l'apprentissage par layer")




In [None]:
from nilearn import datasets, surface, plotting
from nilearn.input_data import NiftiMasker
import numpy as np

fsaverage = datasets.fetch_surf_fsaverage()
data_voxels = np.array(params_voxels)[:, 1]
img_data = nifti_masker.inverse_transform(data_voxels)

texture_left = surface.vol_to_surf(img_data, fsaverage['pial_left'])
texture_right = surface.vol_to_surf(img_data, fsaverage['pial_right'])

plotting.plot_surf(fsaverage['infl_left'], texture_left, hemi='left',
                   cmap='Spectral_r', colorbar=True, title='Offset apprentissage : Surface gauche',
                   vmin=5, vmax=8)

plotting.plot_surf(fsaverage['infl_right'], texture_right, hemi='right',
                   cmap='Spectral_r', colorbar=True, title='Offset apprentissage : Surface droite',
                   vmin=5, vmax=8)

plotting.show()


In [None]:
import numpy as np
from scipy.optimize import curve_fit
from scipy.special import expit  # C'est la fonction sigmoid
from tqdm import tqdm


# Fonction sigmoïde modélisée
def sigmoid_model(x, a, b, lambda_):
    return a + abs(lambda_) * expit(abs(1) * x + b)

# Estimation des paramètres pour chaque voxel
def estimate_parameters(layers, arr, tresh=0):
    params = []
    for voxel_data in tqdm(arr.T):  # On transpose l'array pour itérer sur les voxels
        initial_guess = [1, 1, 1]
        if True:
            if tresh >= 0:
                if np.max(voxel_data) > tresh:
                    popt, _ = curve_fit(sigmoid_model, layers, voxel_data, p0=initial_guess, maxfev=50000)
                    # x_fit = np.linspace(min(layers), max(layers), 100)
                    # y_fit = sigmoid_model(x_fit, *popt)
                    # plt.scatter(layers, voxel_data, label='Données réelles', color='red')
                    # plt.plot(x_fit, y_fit, label='Ajustement sigmoïde', color='blue')
                    # plt.show()
                    # input()
                    # break
                else:
                    popt = [0, 0, 0]
            else:
                if np.max(voxel_data) < -tresh:
                    popt, _ = curve_fit(sigmoid_model, layers, voxel_data, p0=initial_guess, maxfev=50000)
                else:
                    popt = [0, 0, 0]
        # except Exception:
        #     popt = [0, 0, 0]
        params.append(np.abs(popt))
    return np.array(params)
arr = np.array(corr_layers_voxels_models)[:, -1, :] - corr_random_emb_1024
# print(arr)
# Exemple d'utilisation :
layers = np.log(steps)
layers[0] = - 10
params_voxels = estimate_parameters(layers, arr, 0.)
print(sum(params_voxels))

In [None]:
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt

# Convertir les params_voxels en un tableau numpy
params_voxels = np.array(params_voxels)

# Extraire les colonnes x1, x2, x3
x1 = params_voxels[:, 0] * 50
x2 = params_voxels[:, 1]
x3 = params_voxels[:, 2] * 20

# Créer des masques pour filtrer les valeurs égales à 0
mask = (x1 != 0) & (x2 != 0) & (x3 != 0)

# Appliquer le masque pour filtrer les valeurs nulles
x1_filtered = x1[mask]
x2_filtered = x2[mask]
x3_filtered = x3[mask]

# Calculer la KDE avec les données filtrées
kde_x1 = gaussian_kde(x1_filtered, bw_method=0.005)
kde_x2 = gaussian_kde(x2_filtered, bw_method=0.001)
kde_x3 = gaussian_kde(x3_filtered, bw_method=0.00007)
# Définir un intervalle de points pour l'évaluation de la densité
x_vals = np.linspace(-1, 10, 1_000)

# Tracer les densités
plt.plot(x_vals, kde_x1(x_vals), label='bias', lw=2)
plt.plot(x_vals, kde_x2(x_vals), label='offset', lw=2)
plt.plot(x_vals, kde_x3(x_vals), label='5 * yscale', lw=2)

# Ajouter légendes et titre
plt.legend()
plt.title("Densité lissée des paramètres. y = yscale * sig(xscale * x + offset)")
plt.show()


In [None]:
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt

# Convertir les params_voxels en un tableau numpy
params_voxels = np.array(params_voxels)

# Extraire les colonnes x1, x2, x3
x1 = params_voxels[:, 0] * 10
x2 = params_voxels[:, 1]
x3 = params_voxels[:, 2] * 20

# Créer des masques pour filtrer les valeurs égales à 0
mask = (x1 != 0) & (x2 != 0) & (x3 != 0)

# Appliquer le masque pour filtrer les valeurs nulles
x1_filtered = x1[mask]
x2_filtered = x2[mask]
x3_filtered = x3[mask]

# Calculer la KDE avec les données filtrées
kde_x1 = gaussian_kde(x1_filtered, bw_method=0.005)
kde_x2 = gaussian_kde(x2_filtered, bw_method=0.001)
kde_x3 = gaussian_kde(x3_filtered, bw_method=0.00007)
# Définir un intervalle de points pour l'évaluation de la densité
x_vals = np.linspace(-1, 10, 1_000)

# Tracer les densités
plt.plot(x_vals, kde_x1(x_vals), label='bias', lw=2)
plt.plot(x_vals, kde_x2(x_vals), label='offset', lw=2)
# plt.plot(x_vals, kde_x3(x_vals), label='5 * yscale', lw=2)

# Ajouter légendes et titre
plt.legend()
plt.title("Densité lissée des paramètres. y = yscale * sig(xscale * x + offset)")
plt.show()


In [None]:
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt

# Convertir les params_voxels en un tableau numpy
params_voxels = np.array(params_voxels)

# Extraire les colonnes x1, x2, x3
x1 = params_voxels[:, 1]
x2 = params_voxels[:, 1]
x3 = params_voxels[:, 2]

# Créer des masques pour filtrer les valeurs égales à 0
mask = (x1 != 0) & (x2 != 0) & (x3 != 0)

# Appliquer le masque pour filtrer les valeurs nulles
x1_filtered = x1[mask]
x2_filtered = x2[mask]
x3_filtered = x3[mask] * 5

# Calculer la KDE avec les données filtrées
kde_x1 = gaussian_kde(x1_filtered, bw_method=0.005)
kde_x2 = gaussian_kde(x2_filtered, bw_method=0.005)
kde_x3 = gaussian_kde(x3_filtered, bw_method=0.05)
# Définir un intervalle de points pour l'évaluation de la densité
x_vals = np.linspace(0, 10, 10_000)

# Tracer les densités
# plt.plot(x_vals, kde_x1(x_vals), label='5 * xscale', lw=2)
plt.plot(x_vals, kde_x2(x_vals), label='offset', lw=2)
plt.plot(x_vals, kde_x3(x_vals), label='5 * yscale', lw=2)

# Ajouter légendes et titre
plt.legend()
plt.title("Densité lissée des paramètres. y = yscale * sig(xscale * x + offset)")
plt.show()


In [None]:
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt

# Convertir les params_voxels en un tableau numpy
params_voxels = np.array(params_voxels)

# Extraire les colonnes x1, x2, x3
x1 = params_voxels[:, 1]
x2 = params_voxels[:, 1]
x3 = params_voxels[:, 2]

# Créer des masques pour filtrer les valeurs égales à 0
mask = (x1 != 0) & (x2 != 0) & (x3 != 0)

# Appliquer le masque pour filtrer les valeurs nulles
x1_filtered = x1[mask]
x2_filtered = x2[mask]
x3_filtered = x3[mask] * 5

# Calculer la KDE avec les données filtrées
kde_x1 = gaussian_kde(x1_filtered, bw_method=0.005)
kde_x2 = gaussian_kde(x2_filtered, bw_method=0.005)
kde_x3 = gaussian_kde(x3_filtered, bw_method=0.05)
# Définir un intervalle de points pour l'évaluation de la densité
x_vals = np.linspace(0, 10, 10_000)

# Tracer les densités
# plt.plot(x_vals, kde_x1(x_vals), label='5 * xscale', lw=2)
plt.plot(x_vals, kde_x2(x_vals), label='offset', lw=2)
plt.plot(x_vals, kde_x3(x_vals), label='5 * yscale', lw=2)

# Ajouter légendes et titre
plt.legend()
plt.title("Densité lissée des paramètres. y = yscale * sig(xscale * x + offset)")
plt.show()


In [None]:
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt

# Convertir les params_voxels en un tableau numpy
params_voxels = np.array(params_voxels)

# Extraire les colonnes x1, x2, x3
x1 = params_voxels[:, 0]
x2 = params_voxels[:, 1]
x3 = params_voxels[:, 2]

# Créer des masques pour filtrer les valeurs égales à 0
mask = (x1 != 0) & (x2 != 0) & (x3 != 0)

# Appliquer le masque pour filtrer les valeurs nulles
x1_filtered = x1[mask] * 5
x2_filtered = x2[mask]
x3_filtered = x3[mask] * 5

# Calculer la KDE avec les données filtrées
kde_x1 = gaussian_kde(x1_filtered, bw_method=0.005)
kde_x2 = gaussian_kde(x2_filtered, bw_method=0.005)
kde_x3 = gaussian_kde(x3_filtered, bw_method=0.05)
# Définir un intervalle de points pour l'évaluation de la densité
x_vals = np.linspace(0, 5, 10_000)

# Tracer les densités
plt.plot(x_vals, kde_x1(x_vals), label='5 * xscale', lw=2)
plt.plot(x_vals, kde_x2(x_vals), label='offset', lw=2)
plt.plot(x_vals, kde_x3(x_vals), label='5 * yscale', lw=2)

# Ajouter légendes et titre
plt.legend()
plt.title("Densité lissée des paramètres. y = yscale * sig(xscale * x + offset)")
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Tracer le scatter plot
plt.scatter(x2_filtered, x3_filtered)
print(np.mean(x3_filtered))
plt.xlim([0, 15])
plt.ylim([0, 10])
# Ajouter des titres et labels
plt.title("Scatter plot entre l'offset et y_scale ; pas de filtrage")
plt.xlabel("offset")
plt.ylabel("50 * yscale")

# Afficher le graphique
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Tracer le scatter plot
plt.scatter(x2_filtered, x3_filtered)
print(np.mean(x3_filtered))
plt.xlim([0, 15])
plt.ylim([0, 10])
# Ajouter des titres et labels
plt.title("Scatter plot entre xscale et offset avec filtrage des valeurs > 50 en absolu")
plt.xlabel("offset")
plt.ylabel("5 * yscale")

# Afficher le graphique
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt


print(np.argmax(corr_lograndom_emb_1024))

# Tracer le scatter plot
plt.scatter(x2, x3)
# plt.scatter(x2[sorted_indices[:300]], x3[sorted_indices[:300]])
# plt.scatter(x2[np.where(np.abs(x2-6.7) > 1.4)], x3[np.where(np.abs(x2-6.7) > 1.4)])
plt.scatter(x2[len(x2)//2:], x3[len(x2)//2:])
# plt.scatter(x2[:len(x2)//2], x3[:len(x2)//2])
print(np.mean(x3_filtered))
plt.xlim([0, 15])
plt.ylim([0, 10])
# Ajouter des titres et labels
plt.title("Scatter plot entre xscale et offset. En orange, l'hémisphère doite")
plt.xlabel("offset")
plt.ylabel("5 * yscale")

# Afficher le graphique
plt.show()


In [None]:
sorted_indices = np.argsort(corr_lograndom_emb_1024)[::-1]

# Get the index of the 100th largest element (since indexing starts at 0, we take 99th)
hundredth_argmax = sorted_indices[99]

hundredth_argmax
w = np.where(x1 > 0)
arr = np.zeros(n_voxels)
arr[np.array(sorted_indices[:300])] = 100
arr = arr * 0
arr[np.where(x2-6.7 > 1.4)] = 1
arr = arr * 0
arr[np.where(np.abs(x3) < 2)] = 1

arr = arr * 0
arr[len(arr)//2:] = 1

imgtmp = nifti_masker.inverse_transform(arr)

temp_filename = f'temppp_{step_idx}.png'
plotting.plot_img_on_surf(imgtmp,
                        surf_mesh='fsaverage5',
                        views=['lateral'],
                        hemispheres=['left', 'right'],
                        cmap='Spectral_r',
                        vmax=1,
                        symmetric_cbar=False,
                        cbar_tick_format='%.2f',
                        colorbar=True,
                        title="aires a grand offset")


In [None]:
sorted_indices = np.argsort(corr_voxels_models[-1])[::-1]

# Get the index of the 100th largest element (since indexing starts at 0, we take 99th)
hundredth_argmax = sorted_indices[99]

hundredth_argmax
w = np.where(x1 > 0)
arr = np.zeros(n_voxels)
arr[np.array(sorted_indices[:300])] = 1
imgtmp = nifti_masker.inverse_transform(arr)

temp_filename = f'temppp_{step_idx}.png'
plotting.plot_img_on_surf(imgtmp,
                        surf_mesh='fsaverage5',
                        views=['lateral'],
                        hemispheres=['left', 'right'],
                        cmap='Spectral_r',
                        symmetric_cbar=False,
                        cbar_tick_format='%.2f',
                        colorbar=True,
                        title="coucou")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Créer une figure et une grille de sous-plots
fig, axs = plt.subplots(3, 3, figsize=(10, 10))

# Données pour les plots (exemple)
x = np.linspace(0, 10, 100)
y = np.sin(x)
tresh = [0, 0.1, 0.3]

# Légendes pour les lignes et les colonnes
row_labels = ['all', 'max corr > 0.1', 'max corr > 0.3']
col_labels = ['x=xscale * y=offset', 'x=xscale * y=yscale', 'x=offset * y=yscale']

# Tracer les plots et ajouter les légendes
for i in range(3):
    for j in range(3):
        arr = np.array(corr_layers_voxels_models)[:, -1, :]
        layers = np.arange(arr.shape[0])
        data = estimate_parameters(layers, arr, tresh[i])
        x1 = data[:, 0]
        x2 = data[:, 1]
        x3 = data[:, 2]
        mask = (np.abs(x1) <= 1) & (np.abs(x2) <= 1) & (np.abs(x1) > 0.001) & (np.abs(x2) > 0.001) & (np.abs(x3) <= 1)
        x1_filtered = np.abs(x1[mask])
        x2_filtered = np.abs(x2[mask])
        x3_filtered = np.abs(x3[mask])
        if j == 0:
            x = x1_filtered * 5
            y = x2_filtered
        elif j == 1:
            x = x1_filtered * 5
            y = x3_filtered * 5
        else:
            x = x3_filtered * 5
            y = x2_filtered
        # axs[i, j].scatter(x, y)
        sns.kdeplot(x=x, y=y, fill=True, cmap="Blues", thresh=0, levels=100, ax=axs[i, j], bw_adjust=0.5)
        axs[i, j].set_xlim(0, 5)
        axs[i, j].set_ylim(0, 5)

        # axs[i, j].set_title(f'Plot {i*3 + j + 1}')

# Ajouter les légendes pour les lignes et les colonnes
for i, label in enumerate(row_labels):
    axs[i, 0].annotate(label, xy=(-0.3, 0.5), xycoords='axes fraction',
                       va='center', ha='right', fontsize=12, rotation=90)

for j, label in enumerate(col_labels):
    axs[0, j].annotate(label, xy=(0.5, 1.1), xycoords='axes fraction',
                       va='bottom', ha='center', fontsize=12)

# Ajuster les espaces entre les plots
plt.tight_layout()

# Afficher la figure
plt.show()


In [None]:
fh = plot_xy(steps, corr_models_rv, 
             xlabel='epoch', ylabel='brain correlation', 
             logx=True)
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_params_corr_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
nocorr_models_rv = np.array([np.mean(corr_voxels[16808]) for corr_voxels in corr_voxels_models])

fh = plot_xy(steps, nocorr_models_rv, 
             xlabel='epoch', ylabel='brain correlation', 
             logx=True)
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_params_corr_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
nocorr_models_rv = np.array([np.mean(corr_voxels[0]) for corr_voxels in corr_voxels_models])

fh = plot_xy(steps, nocorr_models_rv, 
             xlabel='epoch', ylabel='brain correlation', 
             logx=True)
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_params_corr_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
nocorr_models_rv = np.array(corr_layers_models[-1])

fh = plot_xy(range(len(nocorr_models_rv)), nocorr_models_rv, 
             xlabel='LLM layer', ylabel='brain correlation')
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_params_corr_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
import ipywidgets as widgets
from IPython.display import display, Image

# Créez un widget slider
step_slider = widgets.IntSlider(min=0, max=9, step=1, description='Step Index')

# Fonction pour afficher une image donnée par l'indice du slider
def show_image(step_idx):
    display(Image(filename=f'lop_{step_idx}.png'))

# Connectez le slider à la fonction de plot
interactive_plot = widgets.interactive(show_image, step_idx=step_slider)
display(interactive_plot)

In [None]:
################################ baselines ################################

# look at random embeddings
corr_random_emb_300 = []
for seed in range(1,11):
    model_name ='random_embedding_300d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_random_emb_300.append(corr)
corr_random_emb_300 = np.mean(corr_random_emb_300, axis=0)

corr_random_emb_1024 = []
for seed in range(1,11):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_random_emb_1024.append(corr)
corr_random_emb_1024 = np.mean(corr_random_emb_1024, axis=0)

corr2_random_emb_1024 = []
for seed in range(11,12):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
        imgtmp = nifti_masker.inverse_transform(corr)
        plotting.plot_img_on_surf(imgtmp,
                                  surf_mesh='fsaverage5',
                                  views=['lateral'],
                                  hemispheres=['left', 'right'],
                                  vmin=-0, vmax=0.3,
                                  cmap='Spectral_r',
                                  symmetric_cbar=False,
                                  cbar_tick_format='%.2f',
                                  colorbar=True,
                                  title="exemple de corrélations entre le cerveau et un layer")
        plt.show()
    corr2_random_emb_1024.append(corr)
corr2_random_emb_1024 = np.mean(corr2_random_emb_1024, axis=0)
imgtmp = nifti_masker.inverse_transform(corr2_random_emb_1024)
plotting.plot_img_on_surf(imgtmp,
                            surf_mesh='fsaverage5',
                            views=['lateral'],
                            hemispheres=['left', 'right'],
                            vmin=-0.3, vmax=0.3,
                            cmap='Spectral_r',
                            symmetric_cbar=False,
                            cbar_tick_format='%.2f',
                            colorbar=True,
                            title="x")
plt.show()

corr_random_vec_1024 = []
for seed in range(1,9):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
        corr_voxels = np.array(corr)
    corr_random_vec_1024.append(corr)
corr_random_vec_1024 = np.mean(corr_random_vec_1024, axis=0)


corr_lograndom_vec_1024 = []
for seed in range(10,18):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_lograndom_vec_1024.append(corr)
corr_lograndom_vec_1024 = np.mean(corr_lograndom_vec_1024, axis=0)

corr_lograndom_vec_300 = []
for seed in range(100,110):
    model_name ='random_vector_300d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_lograndom_vec_300.append(corr)
corr_lograndom_vec_300 = np.mean(corr_lograndom_vec_300, axis=0)

corr_exprandom_vec_1024 = []
for seed in range(30,40):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_exprandom_vec_1024.append(corr)
    print(corr_exprandom_vec_1024)
corr_exprandom_vec_1024 = np.mean(corr_exprandom_vec_1024, axis=0)

corr_cauchyrandom_vec_1024 = []
for seed in range(40,50):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_cauchyrandom_vec_1024.append(corr)
corr_cauchyrandom_vec_1024 = np.mean(corr_cauchyrandom_vec_1024, axis=0)

corr_georandom_vec_1024 = []
for seed in range(50,60):
    model_name ='random_vector_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_georandom_vec_1024.append(corr)
corr_georandom_vec_1024 = np.mean(corr_georandom_vec_1024, axis=0)

corr_lograndom_emb_1024 = []
for seed in range(20,28):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_lograndom_emb_1024.append(corr)
corr_lograndom_emb_1024 = np.mean(corr_lograndom_emb_1024, axis=0)

corr_exprandom_emb_1024 = []
for seed in range(30,39):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_exprandom_emb_1024.append(corr)
corr_exprandom_emb_1024 = np.mean(corr_exprandom_emb_1024, axis=0)

corr_cauchyrandom_emb_1024 = []
for seed in range(40,49):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_cauchyrandom_emb_1024.append(corr)
corr_cauchyrandom_emb_1024 = np.mean(corr_cauchyrandom_emb_1024, axis=0)

corr_georandom_emb_1024 = []
for seed in range(50,59):
    model_name ='random_embedding_1024d_seed{}'.format(seed)
    filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
    with open(filename, 'rb') as f:
        corr = joblib.load(f)
    corr_georandom_emb_1024.append(corr)
corr_georandom_emb_1024 = np.mean(corr_georandom_emb_1024, axis=0)

model_name ='onehot_emb'
filename = os.path.join(glm_folder, 'onehot_emb_layer-0_corr.gz')
with open(filename, 'rb') as f:
    corr_onehot = joblib.load(f)

model_name ='onehot_emb2'
filename = os.path.join(glm_folder, 'onehot_emb2_layer-0_corr.gz')
with open(filename, 'rb') as f:
    corr_onehot2 = joblib.load(f)
print(corr_onehot2, min(corr_onehot2))


model_name ='onehotlongphoneme'
filename = os.path.join(glm_folder, 'onehotlongphoneme_layer-0_corr.gz')
with open(filename, 'rb') as f:
    onehotlongphoneme = joblib.load(f)

model_name ='onehotphoneme_emb'
filename = os.path.join(glm_folder, 'onehotphoneme_emb_layer-0_corr.gz')
with open(filename, 'rb') as f:
    corr_onehotphoneme = joblib.load(f)

model_name ='onehotphoneme_emb2'
filename = os.path.join(glm_folder, 'onehotphoneme_emb2_layer-0_corr.gz')
with open(filename, 'rb') as f:
    corr_onehotphoneme2 = joblib.load(f)

model_name ='onehotphoneme_embnoalpha'
filename = os.path.join(glm_folder, 'onehotphoneme_embnoalpha_layer-0_corr.gz')
with open(filename, 'rb') as f:
    corr_onehotphonemenoalpha = joblib.load(f)

# onehot_embnoalpha
model_name ='onehot_embnoalpha'
filename = os.path.join(glm_folder, 'onehot_embnoalpha_layer-0_corr.gz')
with open(filename, 'rb') as f:
    onehot_embnoalpha = joblib.load(f)
# GloVe
model_name ='glove'
filename = os.path.join(glm_folder, '{}_layer-{}_corr.gz'.format(model_name, 0))
with open(filename, 'rb') as f:
    corr_glove = joblib.load(f)

model_name ='randomembjump'
filename = os.path.join(glm_folder, 'randomembjump_layer-0_corr.gz')
with open(filename, 'rb') as f:
    randomembjump = joblib.load(f)

    
model_name ='on est là'
filename = os.path.join(glm_folder, 'on est là_layer-0_corr.gz')
with open(filename, 'rb') as f:
    autre = joblib.load(f)

model_name ='on est là2'
filename = os.path.join(glm_folder, 'on est là2_layer-0_corr.gz')
with open(filename, 'rb') as f:
    autre2 = joblib.load(f)

model_name ='randomtestinterceptembjump'
filename = os.path.join(glm_folder, 'testintercept10_layer-0_corr.gz')
with open(filename, 'rb') as f:
    testintercept = joblib.load(f)


model_name ='randomtestinterceptembjump'
filename = os.path.join(glm_folder, 'autretestintercept10_layer-0_corr.gz')
with open(filename, 'rb') as f:
    testinterceptautre = joblib.load(f)

print(np.mean(corr_random_emb_1024[is_voxel_reliable]))
print(np.var(corr_random_emb_1024[is_voxel_reliable]))
is_not_reliable = [False if i else True for i in is_voxel_reliable]
print(np.mean(corr_random_emb_1024[is_not_reliable]))
print(np.var(corr_random_emb_1024[is_not_reliable]))

In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
print(is_voxel_reliable)
data = [corr_random_emb_1024[is_voxel_reliable]]
print(np.shape(data))
# Création des traces pour chaque distribution
fig = go.Figure()

for i, dist in enumerate(data):
    fig.add_trace(
        go.Histogram(
            x=dist,
            name=f'Time {i}',
            visible=False,  # Masquer toutes les traces sauf la première
        )
    )

# Rendre la première trace visible
fig.data[0].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * len(data)},
              {"title": f"Distribution at Time {i}"}],
    )
    gstep["args"][0]["visible"][i] = True  # Rendre la trace actuelle visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distribution Over Time"
)

fig.show()


In [None]:
import numpy as np
import plotly.graph_objects as go

np.random.seed(0)
print(is_voxel_reliable)
data = [corr_random_emb_1024[is_not_reliable]]
print(np.shape(data))
# Création des traces pour chaque distribution
fig = go.Figure()

for i, dist in enumerate(data):
    fig.add_trace(
        go.Histogram(
            x=dist,
            name=f'Time {i}',
            visible=False,  # Masquer toutes les traces sauf la première
        )
    )

# Rendre la première trace visible
fig.data[0].visible = True

# Création des boutons pour le slider
gsteps = []
for i in range(len(data)):
    gstep = dict(
        method="update",
        args=[{"visible": [False] * len(data)},
              {"title": f"Distribution at Time {i}"}],
    )
    gstep["args"][0]["visible"][i] = True  # Rendre la trace actuelle visible
    gsteps.append(gstep)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time: "},
    pad={"t": 50},
    steps=gsteps
)]

fig.update_layout(
    sliders=sliders,
    title="Evolution of Distribution Over Time"
)

fig.show()


In [None]:
color_baseline = sns.color_palette('crest', n_colors=15)[::5]

class MyLegendHandler(HandlerBase):
    def create_artists(self, legend, orig_handle,
                       x0, y0, width, height, fontsize, trans):
        lines = []
        n_models = len(color_models[::8])
        for i, color in enumerate(color_models[::8]):
             lines.append(plt.Line2D([x0,y0+width], 
                               [i/n_models*height,i/n_models*height], 
                               color=color))
        return lines

fh = plt.figure(figsize=(8, 4))
ax = plt.subplot(111)
sns.kdeplot(corr_random_vec_1024, c=color_baseline[0], lw=2, alpha=0.9, cut=0., ax=ax);
#ax.set_yticks([])
ax.set_yticklabels([])
ax.tick_params(axis='y', length=0)
plt.xlabel('brain correlation')
plt.ylabel('normalized density')
ax = plt.twinx()
sns.kdeplot(corr_random_vec_1024, c=color_baseline[0], ls='--', lw=2, alpha=0.9, cut=0.);
ax.grid(None)
ax.axis('off')
ax = plt.twinx()
sns.kdeplot(corr_random_emb_300, c=color_baseline[1], lw=2, alpha=0.9, cut=0.);
ax.grid(None)
ax.axis('off')
ax = plt.twinx()
sns.kdeplot(corr_random_emb_1024, c=color_baseline[1], ls='--', lw=2, alpha=0.9, cut=0.);
ax.grid(None)
ax.axis('off')
ax = plt.twinx()
sns.kdeplot(corr_glove, c=color_baseline[2], lw=2, alpha=0.9, cut=0.);
ax.grid(None)
ax.axis('off')
for corr_voxels, color in zip(corr_voxels_models, color_models):
    ax = plt.twinx()
    sns.kdeplot(corr_voxels, color=color, lw=1.5, alpha=0.6, cut=0.);
    ax.grid(None)
    ax.axis('off')

plt.plot([0], c=color_baseline[0], lw=2, label='random vectors 1024d')
plt.plot([0], c=color_baseline[0], lw=2, ls='--', label='random vectors 1024d')
plt.plot([0], c=color_baseline[1], lw=2, label='random embeddings 300d')
plt.plot([0], c=color_baseline[1], lw=2, ls='--', label='random embeddings 1024d')
plt.plot([0], c=color_baseline[2], lw=2, label='GloVe')

hllm, = plt.plot([0], label='28 large language models')

ax.tick_params(axis='y', labelleft='off')

plt.legend(handler_map={hllm: MyLegendHandler()}, bbox_to_anchor=(1.01,1), loc='upper left')

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'distribution_correlation_models_baselines.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
fh = plt.figure(figsize=(8, 4))
ax = plt.subplot(111)
sns.kdeplot(corr_random_vec_1024[is_voxel_reliable], c=color_baseline[0], lw=2, alpha=0.9, cut=0., ax=ax);
ax.set_yticklabels([])
ax.tick_params(axis='y', length=0)
plt.xlabel('brain correlation')
plt.ylabel('normalized density')
ax = plt.twinx()
sns.kdeplot(corr_random_vec_1024[is_voxel_reliable], c=color_baseline[0], ls='--', lw=2, alpha=0.9, cut=0., ax=ax);
ax.grid(None)
ax.axis('off')
ax = plt.twinx()
sns.kdeplot(corr_random_emb_300[is_voxel_reliable], c=color_baseline[1], lw=2, alpha=0.9, cut=0., ax=ax);
ax.grid(None)
ax.axis('off')
ax = plt.twinx()
sns.kdeplot(corr_random_emb_1024[is_voxel_reliable], c=color_baseline[1], ls='--', lw=2, alpha=0.9, cut=0., ax=ax);
ax.grid(None)
ax.axis('off')
ax = plt.twinx()
sns.kdeplot(corr_glove[is_voxel_reliable], c=color_baseline[2], lw=2, alpha=0.9, cut=0., ax=ax);
ax.grid(None)
ax.axis('off')
for corr_voxels, color in zip(corr_voxels_models, color_models):
    ax = plt.twinx()
    sns.kdeplot(corr_voxels[is_voxel_reliable], color=color, lw=1.5, alpha=0.6, cut=0., ax=ax);
    ax.grid(None)
    ax.axis('off')

plt.plot([0], c=color_baseline[0], lw=2, label='random vectors 300d')
plt.plot([0], c=color_baseline[0], lw=2, ls='--', label='random vectors 1024d')
plt.plot([0], c=color_baseline[1], lw=2, label='random embeddings 300d')
plt.plot([0], c=color_baseline[1], lw=2, ls='--', label='random embeddings 1024d')
plt.plot([0], c=color_baseline[2], lw=2, label='GloVe')

hllm, = plt.plot([0], label='28 large language models')

ax.tick_params(axis='y', labelleft='off')

plt.legend(handler_map={hllm: MyLegendHandler()}, bbox_to_anchor=(1.01,1), loc='upper left')

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'distribution_correlation_models_baselines_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
######## best model ########
idx_best_model = np.argmax(corr_models_rv)

print(model_names[idx_best_model])
corr_voxels = corr_voxels_models[idx_best_model]

imgtmp = nifti_masker.inverse_transform(corr_voxels - corr_random_emb_1024)

vmax = np.max(corr_voxels - corr_random_emb_1024)

fh, axes = plotting.plot_img_on_surf(imgtmp,
                                     surf_mesh='fsaverage5',
                                     views=['lateral'],
                                     hemispheres=['left', 'right'],
                                     vmin=0., vmax=vmax,
                                     cmap='Spectral_r',
                                     symmetric_cbar=False,
                                     cbar_tick_format='%.2f',
                                     colorbar=True,
                                     title=str(model_names[idx_best_model]))
for ax in axes[0:2]:
    ax.set_box_aspect(None, zoom=1.45)
fh.set_size_inches(7, 4.3)
plotting.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'brain_best_over_random_emb.pdf'), bbox_inches='tight', 
               transparent=True, facecolor=(1,1,1,0))
    
######## worst model ########
idx_worst_model = np.argmin(corr_models_rv)

print(model_names[idx_worst_model])

corr_voxels = corr_voxels_models[idx_worst_model]

imgtmp = nifti_masker.inverse_transform(corr_voxels)

fh, axes = plotting.plot_img_on_surf(imgtmp,
                                     surf_mesh='fsaverage5',
                                     views=['lateral'],
                                     hemispheres=['left', 'right'],
                                     vmin=0., vmax=vmax,
                                     cmap='Spectral_r',
                                     symmetric_cbar=False,
                                     cbar_tick_format='%.2f',
                                     colorbar=True,
                                     title=str(model_names[idx_worst_model]))
for ax in axes[0:2]:
    ax.set_box_aspect(None, zoom=1.45)
fh.set_size_inches(7, 4.3)
plotting.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'brain_worst_over_random_emb.pdf'), bbox_inches='tight', 
               transparent=True, facecolor=(1,1,1,0))    

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import imageio
from nilearn import plotting

# Les données doivent être préparées à l'avance
# steps = [1_000, *range(10_000, 140_000, 10_000), 143000]
# corr_models_rv = [...]
vmax = np.max(np.max(corr_voxels_models, axis=0)-corr_glove)
for step_idx in range(len(steps)):
    idx_best_model = step_idx
    print(f'Generating image for step {steps[step_idx]}')
    
    corr_voxels = corr_voxels_models[idx_best_model]
    imgtmp = nifti_masker.inverse_transform(corr_voxels-corr_glove)
    
    # Enregistrez l'image directement sans utiliser axes
    temp_filename = f'temp12b_{step_idx}.png'
    plotting.plot_img_on_surf(imgtmp,
                              surf_mesh='fsaverage5',
                              views=['lateral'],
                              hemispheres=['left', 'right'],
                              vmin=0., vmax=vmax,
                              cmap='Spectral_r',
                              symmetric_cbar=False,
                              cbar_tick_format='%.2f',
                              colorbar=True,
                              title=str(model_names[idx_best_model]))
    
    plt.savefig(temp_filename)
    plt.close()

# Optionnel : sauvegardez toutes les images sous forme de GIF pour vérification
images = [imageio.imread(f'temp12b_{i}.png') for i in range(len(steps))]
imageio.mimsave('brain_correlations.gif', images, duration=0.5)


In [None]:
import ipywidgets as widgets
from IPython.display import display, Image

# Créez un widget slider
step_slider = widgets.IntSlider(min=0, max=len(steps)-1, step=1, description='Step Index')

# Fonction pour afficher une image donnée par l'indice du slider
def show_image(step_idx):
    display(Image(filename=f'temp12b_{step_idx}.png'))

# Connectez le slider à la fonction de plot
interactive_plot = widgets.interactive(show_image, step_idx=step_slider)
display(interactive_plot)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import imageio
from nilearn import plotting

# Les données doivent être préparées à l'avance
# steps = [1_000, *range(10_000, 140_000, 10_000), 143000]
# corr_models_rv = [...]
vmax = np.max(np.max(corr_voxels_models, axis=0))
for step_idx in range(len(steps)):
    idx_best_model = step_idx
    print(f'Generating image for step {steps[step_idx]}')
    
    corr_voxels = corr_voxels_models[idx_best_model]
    imgtmp = nifti_masker.inverse_transform(corr_voxels)
    
    # Enregistrez l'image directement sans utiliser axes
    temp_filename = f'tempnocorr_{step_idx}.png'
    plotting.plot_img_on_surf(imgtmp,
                              surf_mesh='fsaverage5',
                              views=['lateral'],
                              hemispheres=['left', 'right'],
                              vmin=0., vmax=vmax,
                              cmap='Spectral_r',
                              symmetric_cbar=False,
                              cbar_tick_format='%.2f',
                              colorbar=True,
                              title=str(model_names[idx_best_model]))
    
    plt.savefig(temp_filename)
    plt.close()

# Optionnel : sauvegardez toutes les images sous forme de GIF pour vérification
images = [imageio.imread(f'tempnocorr_{i}.png') for i in range(len(steps))]
imageio.mimsave('brain_correlations_nocorr.gif', images, duration=0.5)


In [None]:
import ipywidgets as widgets
from IPython.display import display, Image

# Créez un widget slider
step_slider = widgets.IntSlider(min=0, max=len(steps)-1, step=1, description='Step Index')

# Fonction pour afficher une image donnée par l'indice du slider
def show_image(step_idx):
    display(Image(filename=f'tempnocorr_{step_idx}.png'))

# Connectez le slider à la fonction de plot
interactive_plot = widgets.interactive(show_image, step_idx=step_slider)
display(interactive_plot)


In [None]:
################################ L/R Asymmetries ################################
# whole brain

#l r asym in baselines
for corr_voxels in [corr_random_emb_300, corr_random_emb_1024, corr2_random_emb_1024, corr_lograndom_emb_1024,
                    corr_glove, corr_random_vec_1024, corr_lograndom_vec_1024]:
    print((corr_voxels[:n_voxels//2].mean() - corr_voxels[n_voxels//2:].mean()) / np.mean(corr_voxels))
    print((corr_voxels[:n_voxels//2].mean() - corr_voxels[n_voxels//2:].mean()))
    print("------")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import imageio
from nilearn import plotting


vmax = np.max(np.max(corr_voxels_models, axis=0)-corr_random_emb_1024)
for i, random_emb_corr in enumerate([corr_random_emb_300, corr_random_emb_1024, corr2_random_emb_1024,
                                     corr_lograndom_emb_1024, corr_random_vec_1024, corr_lograndom_vec_1024,
                                     corr_glove, corr_onehotphoneme]):
    print("generation img no correction")
    corr_voxels = random_emb_corr
    imgtmp = nifti_masker.inverse_transform(corr_voxels)
    
    # Enregistrez l'image directement sans utiliser axes
    temp_filename = f'penocor_{i}.png'
    plotting.plot_img_on_surf(imgtmp,
                              surf_mesh='fsaverage5',
                              views=['lateral'],
                              hemispheres=['left', 'right'],
                              vmin=0., vmax=vmax,
                              cmap='Spectral_r',
                              symmetric_cbar=False,
                              cbar_tick_format='%.2f',
                              colorbar=True,
                              title=str(model_names[idx_best_model]))
    
    plt.savefig(temp_filename)
    plt.close()

# Optionnel : sauvegardez toutes les images sous forme de GIF pour vérification
images = [imageio.imread(f'penocor_{i}.png') for i in range(6)]
imageio.mimsave('tempbruit_.gif', images, duration=0.5)


In [None]:
import ipywidgets as widgets
from IPython.display import display, Image

# Créez un widget slider
step_slider = widgets.IntSlider(min=0, max=6, step=1, description='Step Index')

# Fonction pour afficher une image donnée par l'indice du slider
def show_image(step_idx):
    display(Image(filename=f'penocor_{step_idx}.png'))

# Connectez le slider à la fonction de plot
interactive_plot = widgets.interactive(show_image, step_idx=step_slider)
display(interactive_plot)

In [None]:
def get_var_name(var):
    for name, value in locals().items():
        if value is var:
            return name

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import imageio
from nilearn import plotting

vmin=0
vmax = np.max(np.max(corr_voxels_models, axis=0)-corr_random_emb_1024)
noms = ['corr_random_emb_300', 'corr_random_emb_1024', 'corr2_random_emb_1024',
        'corr_lograndom_emb_1024', 'corr_exprandom_emb_1024', 'corr_cauchyrandom_emb_1024',
        'corr_georandom_emb_1024','corr_random_vec_1024', 'corr_lograndom_vec_300', 'corr_lograndom_vec_1024',
        'corr_exprandom_vec_1024', 'corr_georandom_vec_1024', 'corr_cauchyrandom_vec_1024',
        'corr_onehot', 'corr_onehot2', 'onehotlongphoneme', 'corr_onehotphoneme', 'corr_onehotphoneme2', 'corr_onehotphonemenoalpha',
        'onehot_embnoalpha', 'corr_glove', 'randomembjump', 'autre', 'autre2', 'kurtosis', 'skewness', 'corr_random_emb_1024', 'corr_lograndom_vec_1024', 'absskewness']
models = [corr_random_emb_300, corr_random_emb_1024, corr2_random_emb_1024,
                                     corr_lograndom_emb_1024, corr_exprandom_emb_1024,
                                     corr_cauchyrandom_emb_1024, corr_georandom_emb_1024,
                                     corr_random_vec_1024, corr_lograndom_vec_300, corr_lograndom_vec_1024,
                                     corr_exprandom_vec_1024, corr_georandom_vec_1024, corr_cauchyrandom_vec_1024,
                                     corr_onehot, corr_onehot2, onehotlongphoneme, corr_onehotphoneme, corr_onehotphoneme2, corr_onehotphonemenoalpha,
                                     onehot_embnoalpha, corr_glove, randomembjump, autre, autre2, testinterceptautre, testintercept, corr_random_emb_1024,
                                     corr_lograndom_vec_1024, np.abs(testintercept)]
print(len(models), len(noms))
for i, random_emb_corr in enumerate(models):
    print("generation img no correction")
    corr_voxels = np.array(random_emb_corr)
    imgtmp = nifti_masker.inverse_transform(corr_voxels)
    
    # Enregistrez l'image directement sans utiliser axes
    temp_filename = f'tempbruit_{i}.png'
    if i == 21:
        vmax=0.8
        vmin=0.3
    if i == 22:
        vmax=0.04
        vmin=-0.3
    if i == 23:
        vmax=0.82
        vmin=0.2
    if i == 24:
        vmax=0.5
        vmin=-0.5
    if i == 26:
        vmax=np.max(random_emb_corr)
        vmin=0
    plotting.plot_img_on_surf(imgtmp,
                              surf_mesh='fsaverage5',
                              views=['lateral'],
                              hemispheres=['left', 'right'],
                              vmin=vmin, vmax=vmax,
                              cmap='Spectral_r',
                              symmetric_cbar=False,
                              cbar_tick_format='%.2f',
                              colorbar=True,
                              title=noms[i])
    
    plt.savefig(temp_filename)
    plt.close()

# Optionnel : sauvegardez toutes les images sous forme de GIF pour vérification
images = [imageio.imread(f'tempbruit_{i}.png') for i in range(12)]
imageio.mimsave('tempbruit_.gif', images, duration=0.5)


In [None]:
import ipywidgets as widgets
from IPython.display import display, Image

# Créez un widget slider
step_slider = widgets.IntSlider(min=0, max=28, step=1, description='Step Index')

# Fonction pour afficher une image donnée par l'indice du slider
def show_image(step_idx):
    display(Image(filename=f'tempbruit_{step_idx}.png'))

# Connectez le slider à la fonction de plot
interactive_plot = widgets.interactive(show_image, step_idx=step_slider)
display(interactive_plot)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from nilearn import plotting
import matplotlib.colors as mcolors

# Supposons que vos données sont déjà chargées dans les variables correspondantes
# corr_random_emb_300, corr_random_emb_1024, etc.

# Créez une liste de toutes les corrélations
all_correlations = [corr_random_vec_1024, corr_lograndom_vec_1024,
                    corr_exprandom_vec_1024, corr_georandom_vec_1024, corr_cauchyrandom_vec_1024]

# Convertissez la liste en un tableau numpy
all_correlations = np.array(all_correlations)

# Trouvez l'indice du bruit avec la corrélation maximale pour chaque voxel
max_corr_indices = np.argmax(all_correlations, axis=0)
max_corr = np.max(all_correlations, axis=0)

# Créez une image où chaque voxel est coloré en fonction de l'indice maximal
imgtmp = nifti_masker.inverse_transform(max_corr_indices)
imgtmp2 = nifti_masker.inverse_transform(max_corr)

# Définissez les noms des bruits pour l'affichage
noms = ['corr_random_vec_1024', 'corr_lograndom_vec_1024',
        'corr_exprandom_vec_1024', 'corr_georandom_vec_1024', 'corr_cauchyrandom_vec_1024']

# Créez une colormap personnalisée
cmap = plt.get_cmap('tab20', len(noms))

# Créez les couleurs pour chaque nom
colors = cmap(np.arange(len(noms)))

# Créez une légende pour les couleurs
cmap = mcolors.ListedColormap(colors)
bounds = np.arange(len(noms) + 1) - 0.5
norm = mcolors.BoundaryNorm(bounds, cmap.N)

# Créez la figure et les axes pour l'affichage
fig, axes = plt.subplots(1, 2, figsize=(20, 10), subplot_kw={'projection': '3d'})

# Plot the first brain image
display = plotting.plot_img_on_surf(imgtmp,
                                    surf_mesh='fsaverage5',
                                    views=['lateral'],
                                    hemispheres=['left', 'right'],
                                    vmin=-0.5, vmax=len(noms) - 0.5,
                                    cmap=cmap,
                                    colorbar=True,
                                    title='Max Correlation per Voxel')
plt.savefig('unique_plot_max_correlation.png', bbox_inches='tight')
# Plot the second brain image
display2 = plotting.plot_img_on_surf(imgtmp2,
                                     surf_mesh='fsaverage5',
                                     views=['lateral'],
                                     hemispheres=['left', 'right'],
                                     vmin=0., vmax=vmax,
                                     cmap='Spectral_r',
                                     symmetric_cbar=False,
                                     cbar_tick_format='%.2f',
                                     colorbar=True,
                                     title="")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from nilearn import plotting
import matplotlib.colors as mcolors

# Supposons que vos données sont déjà chargées dans les variables correspondantes
# corr_random_emb_300, corr_random_emb_1024, etc.

# Créez une liste de toutes les corrélations
all_correlations = [corr_random_emb_300, corr_random_emb_1024, corr2_random_emb_1024,
                    corr_lograndom_emb_1024, corr_exprandom_emb_1024, corr_cauchyrandom_emb_1024,
                    corr_georandom_emb_1024]

# Convertissez la liste en un tableau numpy
all_correlations = np.array(all_correlations)

# Trouvez l'indice du bruit avec la corrélation maximale pour chaque voxel
max_corr_indices = np.argmax(all_correlations, axis=0)
max_corr = np.max(all_correlations, axis=0)

# Créez une image où chaque voxel est coloré en fonction de l'indice maximal
imgtmp = nifti_masker.inverse_transform(max_corr_indices)
imgtmp2 = nifti_masker.inverse_transform(max_corr)

# Définissez les noms des bruits pour l'affichage
noms = ['corr_random_emb_300', 'corr_random_emb_1024', 'corr2_random_emb_1024',
        'corr_lograndom_emb_1024', 'corr_exprandom_emb_1024', 'corr_cauchyrandom_emb_1024',
        'corr_georandom_emb_1024']
# Créez une colormap personnalisée
cmap = plt.get_cmap('tab20', len(noms))

# Créez les couleurs pour chaque nom
colors = cmap(np.arange(len(noms)))

# Créez une légende pour les couleurs
cmap = mcolors.ListedColormap(colors)
bounds = np.arange(len(noms) + 1) - 0.5
norm = mcolors.BoundaryNorm(bounds, cmap.N)

# Créez la figure et les axes pour l'affichage
fig, axes = plt.subplots(1, 2, figsize=(20, 10), subplot_kw={'projection': '3d'})

# Plot the first brain image
display = plotting.plot_img_on_surf(imgtmp,
                                    surf_mesh='fsaverage5',
                                    views=['lateral'],
                                    hemispheres=['left', 'right'],
                                    vmin=-0.5, vmax=len(noms) - 0.5,
                                    cmap=cmap,
                                    colorbar=True,
                                    title='Max Correlation per Voxel')
plt.savefig('unique_plot_max_correlation.png', bbox_inches='tight')
# Plot the second brain image
display2 = plotting.plot_img_on_surf(imgtmp2,
                                     surf_mesh='fsaverage5',
                                     views=['lateral'],
                                     hemispheres=['left', 'right'],
                                     vmin=0., vmax=vmax,
                                     cmap='Spectral_r',
                                     symmetric_cbar=False,
                                     cbar_tick_format='%.2f',
                                     colorbar=True,
                                     title="")


In [None]:
print(steps)

In [None]:
noms = ['corr_random_emb_300', 'corr_random_emb_1024', 'corr2_random_emb_1024',
        'corr_lograndom_emb_1024', 'corr_exprandom_emb_1024', 'corr_cauchyrandom_emb_1024',
        'corr_georandom_emb_1024', 'corr_glove']

In [None]:
fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.regplot(x=steps, 
            y=np.mean(corr_l_models, axis=1),
            logx=True, ax=ax, label='L', color=l_r_colors[0]);
sns.regplot(x=steps, 
            y=np.mean(corr_r_models, axis=1),
            logx=True, ax=ax, label='R', color=l_r_colors[1]);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
plt.legend()
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_r_all_mean.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.regplot(x=steps, 
            y=np.mean(corr_l_models, axis=1)-np.mean(corr_r_models, axis=1), 
            logx=True, ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation: L - R')
r, p = pearsonr(np.log(steps), (np.mean(corr_l_models, axis=1)
                                      -np.mean(corr_r_models, axis=1)))
fh.text(0.15, 0.85,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
        ha='left', va='top', fontsize=11)
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_minus_r_all.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.regplot(x=steps, 
            y=np.percentile(corr_l_models, 90, axis=1),
            logx=True, ax=ax, label='L', color=l_r_colors[0]);
sns.regplot(x=steps, 
            y=np.percentile(corr_r_models, 90, axis=1),
            logx=True, ax=ax, label='R', color=l_r_colors[1]);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
plt.legend()
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_r_all_90.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.regplot(x=steps, 
            y=np.percentile(corr_l_models, 90, axis=1)-np.percentile(corr_r_models, 90, axis=1),
            logx=True, ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation: L - R')
r, p = pearsonr(np.log(steps), (np.percentile(corr_l_models, 90, axis=1)
                                      -np.percentile(corr_r_models, 90, axis=1)))
fh.text(0.15, 0.85,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
        ha='left', va='top', fontsize=11)
plt.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_minus_r_all_90.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.scatterplot(x=steps, 
            y=np.mean(corr_l_models, axis=1)  - np.mean(corr2_random_emb_1024[:n_voxels//2]), ax=ax, label='L', color=l_r_colors[0]);
sns.scatterplot(x=steps, 
            y=np.mean(corr_r_models, axis=1)  - np.mean(corr2_random_emb_1024[n_voxels//2:]), ax=ax, label='R', color=l_r_colors[1]);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
plt.legend()
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_r_all_mean.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.scatterplot(x=steps, 
            y=np.mean(corr_l_models, axis=1)-np.mean(corr_r_models, axis=1), ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation: L - R')
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_minus_r_all.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.scatterplot(x=steps, 
            y=np.percentile(corr_l_models, 90, axis=1), ax=ax, label='L', color=l_r_colors[0]);
sns.scatterplot(x=steps, 
            y=np.percentile(corr_r_models, 90, axis=1), ax=ax, label='R', color=l_r_colors[1]);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
plt.legend()
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_r_all_90.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.scatterplot(x=steps, 
            y=np.percentile(corr_l_models, 90, axis=1)-np.percentile(corr_r_models, 90, axis=1), ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation: L - R')
plt.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_minus_r_all_90.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
# on the most 25% reliable voxels

#l r asym in baselines
for corr_voxels in [corr_random_emb_300, corr_random_emb_1024,
                    corr_glove]:
    print(corr_voxels[:n_voxels//2][is_voxel_reliable[:n_voxels//2]].mean()
          - corr_voxels[n_voxels//2:][is_voxel_reliable[n_voxels//2:]].mean())

In [None]:
fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.scatterplot(x=steps, 
            y=np.mean(corr_l_models_rv, axis=1),ax=ax, label='L', color=l_r_colors[0]);
sns.scatterplot(x=steps, 
            y=np.mean(corr_r_models_rv, axis=1),
             ax=ax, label='R', color=l_r_colors[1]);
ax.set_xscale('log')
ax.set_xlabel('step')      
ax.set_ylabel('brain correlation')
plt.legend()
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_r_all_mean_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.scatterplot(x=steps, 
            y=(np.mean(corr_l_models_rv, axis=1)
               -np.mean(corr_r_models_rv, axis=1)), ax=ax);
ax.set_xscale('log')
ax.set_xlabel('step')      
ax.set_ylabel('brain correlation: L - R')
ax.axhline(0., ls='--', c='0.4', zorder=1);
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_minus_r_all_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.regplot(x=steps, 
            y=np.mean(corr_l_models_rv, axis=1),
            logx=True, ax=ax, label='L', color=l_r_colors[0]);
sns.regplot(x=steps, 
            y=np.mean(corr_r_models_rv, axis=1),
            logx=True, ax=ax, label='R', color=l_r_colors[1]);
ax.set_xscale('log')
ax.set_xlabel('step')      
ax.set_ylabel('brain correlation')
plt.legend()
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_r_all_mean_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plt.figure(figsize=(6.4, 4.8))
ax = plt.subplot(111)
sns.regplot(x=steps, 
            y=(np.mean(corr_l_models_rv, axis=1)
               -np.mean(corr_r_models_rv, axis=1)), 
            logx=True, ax=ax);
ax.set_xscale('log')
ax.set_xlabel('step')      
ax.set_ylabel('brain correlation: L - R')
r, p = pearsonr(np.log(steps), (np.mean(corr_l_models_rv, axis=1)
                                      -np.mean(corr_r_models_rv, axis=1)))
fh.text(0.15, 0.85,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
        ha='left', va='top', fontsize=11)
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'l_minus_r_all_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
print(np.shape(corr_voxels_models))
corr_voxels_models = np.array(corr_voxels_models)

In [None]:
################################ Slopes, voxelwise ################################
slopes = []
tvalues = []
pvalues = []
for idx_voxel in range(n_voxels):
    y = corr_voxels_models[:, idx_voxel]
    x = np.log(steps)
    x = sm.add_constant(x)
    lm = sm.OLS(y, x)
    res = lm.fit()
    slopes.append(res.params[1])
    tvalues.append(res.tvalues[1])
    pvalues.append(res.pvalues[1])

slopes = np.array(slopes)
tvalues = np.array(tvalues)
pvalues = np.array(pvalues)

vtmp = slopes.copy()
p_thsld = 10**-7
t_thsld = np.abs(scipy.stats.t.ppf(p_thsld/2, df=n_models-2)) #two-sided
print(t_thsld)
vtmp[np.abs(tvalues) < t_thsld] = np.nan
imgtmp = nifti_masker.inverse_transform(vtmp)

fh = plt.figure(figsize=(12,3))
ax = plt.subplot(111)
plotting.plot_glass_brain(imgtmp, threshold=0., display_mode='lyrz', cbar_tick_format='%.3f',
                          plot_abs=False, colorbar=True, axes=ax)
plotting.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'glassbrain_slopes.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
################################ ROIs analysis ################################

roi_names = ['TP', 'aSTS', 'pSTS', 'AG_TPJ', 'BA44', 'BA45', 'BA47']
n_rois = len(roi_names)
folder_mask = 'roi_masks'

roi_list = [os.path.join(folder_mask, '{}.nii.gz'.format(roi_name)) for roi_name in roi_names]
roi_list_r = [swap_img_hemispheres(roi_mask) for roi_mask in roi_list]
rois_t = nifti_masker.transform(roi_list + roi_list_r)
idx_rois = [np.flatnonzero(roi_t == 1.0) for roi_t in rois_t]

In [None]:
cmap = ListedColormap(l_r_colors)

fh = plt.figure(figsize=(12,3))
ax = plt.subplot(111)
for vtmp in rois_t[:n_rois]:
    print(np.sum(vtmp))
    imgtmp = nifti_masker.inverse_transform(vtmp)
    plotting.plot_glass_brain(imgtmp, display_mode='lyrz', cmap=cmap, axes=ax, alpha=0.1)
for vtmp in rois_t[n_rois:]:
    imgtmp = nifti_masker.inverse_transform(vtmp)
    plotting.plot_glass_brain(imgtmp, display_mode='lyrz', cmap=cmap, axes=ax, alpha=0.1)
plotting.plot_glass_brain(None,  display_mode='lyrz', axes=ax)
plt.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'rois_l_r.pdf'), bbox_inches='tight', transparent=True)
    fh.savefig(os.path.join(fig_folder, 'rois_l_r.svg'), bbox_inches='tight', transparent=True)

In [None]:
import os
import numpy as np
from nilearn import plotting
from nilearn.image import load_img
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

# ROIs analysis ###############################

roi_names = ['TP', 'aSTS', 'pSTS', 'AG_TPJ', 'BA44', 'BA45', 'BA47']
n_rois = len(roi_names)
folder_mask = 'roi_masks'

roi_list = [os.path.join(folder_mask, '{}.nii.gz'.format(roi_name)) for roi_name in roi_names]
roi_list_t = [swap_img_hemispheres(roi_mask) for roi_mask in roi_list]
rois_t = nifti_masker.transform(roi_list_t + roi_list)
idx_rois = [np.flatnonzero(roi == 1.0) for roi in rois_t]
print(idx_rois)

# Plotting code ###############################

cmap = ListedColormap(l_r_colors)

fh = plt.figure(figsize=(12, 3))
ax = plt.subplot(111)

# Plot transformed ROIs
for vtmp in rois_t:
    imgtmp = nifti_masker.inverse_transform(vtmp)
    plotting.plot_glass_brain(imgtmp, display_mode='lyrz', cmap=cmap, axes=ax, alpha=0.)

plotting.plot_glass_brain(None, display_mode='lyrz', axes=ax)

plt.show()

if True:
    fh.savefig(os.path.join(fig_folder, 'rois_lr.pdf'), bbox_inches='tight', transparent=True)
    fh.savefig(os.path.join(fig_folder, 'rois_lr.svg'), bbox_inches='tight', transparent=True)


In [None]:
import os
import numpy as np
from nilearn import plotting
from nilearn.image import load_img, math_img, new_img_like

roi_names = ['TP', 'aSTS', 'pSTS', 'AG_TPJ', 'BA44', 'BA45', 'BA47']
n_rois = len(roi_names)
folder_mask = 'roi_masks'

roi_list = [os.path.join(folder_mask, '{}.nii.gz'.format(roi_name)) for roi_name in roi_names]
roi_list_r = [swap_img_hemispheres(roi_mask) for roi_mask in roi_list]
rois_t = nifti_masker.transform(roi_list + roi_list_r)
idx_rois = [np.flatnonzero(roi_t == 1.0) for roi_t in rois_t]
print(len(idx_rois))
print(len(roi_names))
print(idx_rois[0])

# Fonction pour créer une image symétrique
def create_symmetric_image(img):
    data = img.get_fdata()
    affine = img.affine
    # Inversion des coordonnées gauche-droite (axe x)
    data_symmetric = np.flip(data, axis=0)
    symmetric_img = new_img_like(img, data_symmetric, affine)
    return symmetric_img

# Charger la première image de masque pour initier la somme
sum_img = load_img(roi_list[0])
symmetric_img = create_symmetric_image(sum_img)

# Additionner l'image originale et la symétrique
combined_img = math_img("img1 + img2", img1=sum_img, img2=symmetric_img)

# Ajouter les autres images de masque et leurs symétriques
for roi in roi_list[1:]:
    img = load_img(roi)
    sym_img = create_symmetric_image(img)
    combined_img = math_img("img1 + img2 + img3", img1=combined_img, img2=img, img3=sym_img)

# Afficher l'image combinée sur un Glass brain
plotting.plot_glass_brain(combined_img, title='Sum of ROIs and Symmetric Regions', display_mode='lyrz')

# Affichage des graphiques
plotting.show()


In [None]:
print(slopes)

In [None]:
# Calculate delta_slope and delta_corr_model
deltaslope = np.array(slopes[:n_voxels//2]) - np.array(slopes[n_voxels//2:])
delta_corr_model = np.array(corr_voxels_models[:, :n_voxels//2]) - np.array(corr_voxels_models[:, n_voxels//2:])
colors = plt.cm.rainbow(np.linspace(0, 1, n_rois))
barycenter_y = np.mean(deltaslope)
barycenter_x = np.mean(delta_corr_model[:, :], axis=1)
# Function to plot and save graphs for each value of i
def plot_graph(i):
    plt.figure(figsize=(8, 6))
    plt.scatter(delta_corr_model[i, :], deltaslope, label='All Points')
    for j, (idx_roi, roi_name) in enumerate(zip(idx_rois, roi_names)):
        plt.scatter(delta_corr_model[i, idx_roi], deltaslope[idx_roi], color=colors[j], label=f'{roi_name} (i={i})')
    plt.scatter(barycenter_x[i], barycenter_y, color='black', marker='x', s=100, label='Barycenter')
    plt.xlabel("delta correlation L-R")
    plt.ylabel('delta slope L-R')
    plt.title("delta slope vs delta correlation")
    plt.axhline(0, -0.5, 1, color='black')
    plt.legend()
    plt.show()

# Create an interactive slider to change the value of i
from ipywidgets import interact
interact(plot_graph, i=(0, 14, 1));


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import imageio

# Supposons que les variables suivantes soient déjà définies
# n_voxels, slopes, corr_voxels_models, corr_lograndom_emb_1024, idx_rois, roi_names

# Calculate delta_slope and delta_corr_model
deltaslope = np.array(slopes[:n_voxels // 2]) - np.array(slopes[n_voxels // 2:])
delta_corr_model = np.array(corr_voxels_models[:, :n_voxels // 2]) - np.array(corr_voxels_models[:, n_voxels // 2:]) + np.array(corr_lograndom_emb_1024[n_voxels // 2:]) - np.array(corr_lograndom_emb_1024[:n_voxels // 2])
colors = plt.cm.rainbow(np.linspace(0, 1, len(idx_rois)))
barycenter_y = np.mean(deltaslope)
barycenter_x = np.mean(delta_corr_model[:, :], axis=1)

# Function to plot and save graphs for each value of i
def plot_graph(i):
    plt.figure(figsize=(8, 6))
    plt.scatter(delta_corr_model[i, :], deltaslope, label='All Points')
    for j, (idx_roi, roi_name) in enumerate(zip(idx_rois, roi_names)):
        plt.scatter(delta_corr_model[i, idx_roi], deltaslope[idx_roi], color=colors[j], label=f'{roi_name} (i={i})')
    plt.scatter(barycenter_x[i], barycenter_y, color='black', marker='x', s=100, label='Barycenter')
    plt.xlabel("delta correlation L-R")
    plt.ylabel('delta slope L-R')
    plt.title("delta slope vs delta correlation")
    plt.legend()
    plt.axhline(0, color='black')
    plt.savefig(f"frame_{i}.png")
    plt.close()

# Generate and save all frames
n_frames = 15  # Assumons que vous avez 15 images à générer
for i in range(n_frames):
    plot_graph(i)

# Create a GIF from the saved frames
images = []
for i in range(n_frames):
    filename = f"frame_{i}.png"
    images.append(imageio.imread(filename))
imageio.mimsave('delta vs slope.gif', images, duration=0.5)  # Adjust duration as needed

print("GIF saved as 'brain_correlations.gif'")



In [None]:
df_roi_slopes = []
for idx_roi, roi_name in zip(idx_rois[0:n_rois], roi_names):
    df_roi_slopes.append(pd.DataFrame({'roi':roi_name,
                  'slope':slopes[idx_roi],
                  'hemi':'L'}))
for idx_roi, roi_name in zip(idx_rois[n_rois:], roi_names):
    df_roi_slopes.append(pd.DataFrame({'roi':roi_name,
                  'slope':slopes[idx_roi],
                  'hemi':'R'}))
df_roi_slopes = pd.concat(df_roi_slopes)

xtickslabels_roi = []
for roi_name in roi_names:
    ttest = ttest_ind(df_roi_slopes[(df_roi_slopes['roi']==roi_name)&(df_roi_slopes['hemi']=='L')].slope,
              df_roi_slopes[(df_roi_slopes['roi']==roi_name)&(df_roi_slopes['hemi']=='R')].slope)
    print(roi_name, '\t', 
          '{:.01e}'.format(ttest.pvalue), '\t', pvalue2str(ttest.pvalue))
    xtickslabels_roi.append('{}\n({})'.format(roi_name, pvalue2str(ttest.pvalue)))

fh = plt.figure(figsize=(6, 4))
ax = plt.subplot(111)
sns.barplot(data=df_roi_slopes, x='roi', y='slope', hue='hemi', 
            palette=l_r_colors, saturation=1.0, width=0.75, alpha=0.9,
            ax=ax)
plt.legend(loc='upper left')
ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(xtickslabels_roi)
ax.set_xlabel('')
plt.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'rois_slopes.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
print(np.shape(corr_voxels_models))

In [None]:
fh, axes = plt.subplots(1, n_rois, figsize=(16,4), sharey=True)
for i, (idx_roi_l, idx_roi_r, roi_name) in enumerate(zip(idx_rois[:n_rois], idx_rois[n_rois:], roi_names)):
    ax = axes[i]
    sns.scatterplot(x=steps, 
                y=np.mean(corr_voxels_models[:,idx_roi_l], axis=1),
                 color=l_r_colors[0], ax=ax, label='L');   
    sns.scatterplot(x=steps, 
                y=np.mean(corr_voxels_models[:,idx_roi_r], axis=1),
                 color=l_r_colors[1], ax=ax, label='R');   
    ax.set_xscale('log')

    ax.set_title('{}'.format(roi_name))

    
axes[n_rois//2].set_xlabel('steps')      
axes[0].set_ylabel('brain correlation')
axes[0].legend(loc='upper left')
plt.show()

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'rois_l_r_corr.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
corr_random = corr_random_emb_1024
fh, axes = plt.subplots(1, n_rois, figsize=(16,4), sharey=True)
for i, (idx_roi_l, idx_roi_r, roi_name) in enumerate(zip(idx_rois[:n_rois], idx_rois[n_rois:], roi_names)):
    ax = axes[i]
    sns.scatterplot(x=steps, 
                y=np.mean(corr_voxels_models[:,idx_roi_l] - corr_random[idx_roi_l], axis=1),
                color=l_r_colors[0], ax=ax, label='L'); 
    sns.scatterplot(x=steps, 
                y=np.mean(corr_voxels_models[:,idx_roi_r] - corr_random[idx_roi_r], axis=1),
                color=l_r_colors[1], ax=ax, label='R'); 
    ax.set_xscale('log')
    ax.set_title(roi_name)
    
    ax.set_title('{}'.format(roi_name))
    
axes[n_rois//2].set_xlabel('epoch')
axes[0].set_ylabel('brain correlation')
axes[0].legend(loc='upper left')

plt.show()


if save_fig:
    fh.savefig(os.path.join(fig_folder, 'rois_l_r_corr_over_random.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
data_easy = {'arc_easy': [(0, 1.0), (1, 1.0), (2, 0.9951060358890702), (4, 0.99836867862969), (8, 0.9951060358890702), (16, 1.0440456769983686), (32, 1.0326264274061991), (64, 1.0570962479608481), (128, 1.0505709624796085), (256, 1.030995106035889), (512, 1.0456769983686787), (1000, 1.1484502446982054), (3000, 1.7226753670473083), (13000, 2.1517128874388254), (23000, 2.3034257748776508), (33000, 2.407830342577488), (43000, 2.4779771615008155), (53000, 2.5513866231647633), (63000, 2.5742251223491026), (73000, 2.579119086460033), (83000, 2.6443719412724302), (93000, 2.6769983686786296), (103000, 2.66394779771615), (113000, 2.701468189233279), (123000, 2.7389885807504077), (133000, 2.7471451876019577), (143000, 2.7438825448613375)]}
data_challenge = {'arc_challenge': [(0, 1.0), (1, 1.0), (2, 0.996078431372549), (4, 0.9921568627450981), (8, 0.9372549019607844), (16, 0.8980392156862745), (32, 0.8745098039215686), (64, 0.8392156862745098), (128, 0.8862745098039216), (256, 0.9058823529411765), (512, 0.8392156862745098), (1000, 0.8784313725490197), (3000, 0.8980392156862745), (13000, 1.0509803921568628), (23000, 1.2), (33000, 1.3019607843137255), (43000, 1.2470588235294118), (53000, 1.3490196078431371), (63000, 1.3607843137254902), (73000, 1.3568627450980393), (83000, 1.4509803921568627), (93000, 1.4941176470588236), (103000, 1.5098039215686276), (113000, 1.5490196078431373), (123000, 1.5450980392156863), (133000, 1.5529411764705883), (143000, 1.5254901960784315)]}
data_easy = [i[1] for i in data_easy['arc_easy'] if i[0] in steps + [0]]
data_challenge = [i[1] for i in data_challenge['arc_challenge'] if i[0] in steps + [0]]
print(data_easy)
print(data_challenge)


In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)

ax=axes[0]
sns.scatterplot(x=steps,
            y=corr_models,
            ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
r, p = pearsonr(np.log(steps), 
                corr_models)
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[1]

sns.scatterplot(x=data_challenge,
            y=corr_models,
            ax=ax);
ax.set_xlabel('arc_challenge')

fh.subplots_adjust(wspace=0.05)

ax=axes[2]
sns.scatterplot(x=data_easy,
            y=corr_models,
            ax=ax);
ax.set_xlabel('arc_easy')

fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
left_models = np.mean(corr_voxels_models[:, :len(corr_voxels_models[0]) // 2], axis=1)
right_models = np.mean(corr_voxels_models[:, len(corr_voxels_models[0]) // 2:], axis=1)
delta_models = left_models-right_models
print(left_models-right_models)


In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)

ax=axes[0]
sns.scatterplot(x=steps,
            y=left_models,
            ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
r, p = pearsonr(np.log(steps), 
                left_models)
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[1]

sns.scatterplot(x=data_challenge,
            y=left_models,
            ax=ax);
ax.set_xlabel('arc_challenge')

fh.subplots_adjust(wspace=0.05)

ax=axes[2]
sns.scatterplot(x=data_easy,
            y=left_models,
            ax=ax);
ax.set_xlabel('arc_easy')

fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)

ax=axes[0]
sns.scatterplot(x=steps,
            y=right_models,
            ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
r, p = pearsonr(np.log(steps), 
                right_models)
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[1]

sns.scatterplot(x=data_challenge,
            y=right_models,
            ax=ax);
ax.set_xlabel('arc_challenge')

fh.subplots_adjust(wspace=0.05)

ax=axes[2]
sns.scatterplot(x=data_easy,
            y=right_models,
            ax=ax);
ax.set_xlabel('arc_easy')

fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)

ax=axes[0]
sns.scatterplot(x=steps,
            y=delta_models,
            ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
r, p = pearsonr(np.log(steps), 
                delta_models)
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[1]

sns.scatterplot(x=data_challenge,
            y=delta_models,
            ax=ax);
ax.set_xlabel('arc_challenge')

fh.subplots_adjust(wspace=0.05)

ax=axes[2]
sns.scatterplot(x=data_easy,
            y=delta_models,
            ax=ax);
ax.set_xlabel('arc_easy')

fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
import pandas as pd
import statsmodels.api as sm

# Exemple de données
data = {
    'brain_score': corr_models,
    'arcevale' : data_easy,
    'arcevalc': data_challenge
}

df = pd.DataFrame(data)
X = df[['arcevale', 'arcevalc']]
Y = df['brain_score']

# Ajouter une constante pour le terme d'interception
X = sm.add_constant(X)

# Faire la régression multiple
model = sm.OLS(Y, X).fit()

# Afficher les résultats
print(model.summary())

# Afficher les coefficients avec les noms des variables
for variable, coefficient in zip(X.columns, model.params):
    print(f"{variable}: {coefficient}")


In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)

data = [corr_models[i] for i in range(len(corr_models))]
data = np.array(data)
data = (data - np.min(data)) / (np.max(data) - np.min(data))
data_challenge = np.array(data_challenge)
data_challenge = (data_challenge - np.min(data_challenge)) / (np.max(data_challenge) - np.min(data_challenge))
data_easy = np.array(data_easy)
data_easy = (data_easy - np.min(data_easy)) / (np.max(data_easy) - np.min(data_easy))

ax=axes[0]
sns.scatterplot(x=steps,
            y=data,
            ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')

ax=axes[1]

sns.scatterplot(x=steps,
            y=data_challenge,
            ax=ax);
ax.set_xlabel('steps')
ax.set_ylabel("arc easy")

fh.subplots_adjust(wspace=0.05)

ax=axes[2]
sns.scatterplot(x=steps,
            y=data_easy,
            ax=ax);
ax.set_xlabel('steps')
ax.set_ylabel("arc challenge")
fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)

data = [delta_models[i] / np.log(corr_models[i]) for i in range(len(corr_models))]

ax=axes[0]
sns.scatterplot(x=steps,
            y=data,
            ax=ax);
ax.set_xscale('log')
ax.set_xlabel('steps')      
ax.set_ylabel('brain correlation')
r, p = pearsonr(np.log(steps), 
                delta_models)
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[1]

sns.scatterplot(x=data_challenge,
            y=data,
            ax=ax);
ax.set_xlabel('arc_challenge')

fh.subplots_adjust(wspace=0.05)

ax=axes[2]
sns.scatterplot(x=data_easy,
            y=data,
            ax=ax);
ax.set_xlabel('arc_easy')

print(np.corrcoef(data, data_easy))
print(np.corrcoef(data, data_challenge))
print(np.corrcoef(data, steps))

fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
################################ Slopes, voxelwise ################################
slopes = []
tvalues = []
pvalues = []
for idx_voxel in range(n_voxels):
    y = corr_models
    x = data_easy
    x = sm.add_constant(x)
    lm = sm.OLS(y, x)
    res = lm.fit()
    slopes.append(res.params[1])
    tvalues.append(res.tvalues[1])
    pvalues.append(res.pvalues[1])

slopes = np.array(slopes)
tvalues = np.array(tvalues)
pvalues = np.array(pvalues)

vtmp = slopes.copy()
p_thsld = 10**-3
t_thsld = np.abs(scipy.stats.t.ppf(p_thsld/2, df=n_models-2)) #two-sided
print(t_thsld)
vtmp[np.abs(tvalues) < t_thsld] = np.nan
imgtmp = nifti_masker.inverse_transform(vtmp)

fh = plt.figure(figsize=(12,3))
ax = plt.subplot(111)
plotting.plot_glass_brain(imgtmp, threshold=0., display_mode='lyrz', cbar_tick_format='%.3f',
                          plot_abs=False, colorbar=True, axes=ax)
plotting.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'glassbrain_slopes.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
################################ Slopes, voxelwise ################################
slopes = []
tvalues = []
pvalues = []
for idx_voxel in range(n_voxels):
    y = corr_models
    x = data_challenge
    x = sm.add_constant(x)
    lm = sm.OLS(y, x)
    res = lm.fit()
    slopes.append(res.params[1])
    tvalues.append(res.tvalues[1])
    pvalues.append(res.pvalues[1])

slopes = np.array(slopes)
tvalues = np.array(tvalues)
pvalues = np.array(pvalues)

vtmp = slopes.copy()
p_thsld = 10**-3
t_thsld = np.abs(scipy.stats.t.ppf(p_thsld/2, df=n_models-2)) #two-sided
print(t_thsld)
vtmp[np.abs(tvalues) < t_thsld] = np.nan
imgtmp = nifti_masker.inverse_transform(vtmp)

fh = plt.figure(figsize=(12,3))
ax = plt.subplot(111)
plotting.plot_glass_brain(imgtmp, threshold=0., display_mode='lyrz', cbar_tick_format='%.3f',
                          plot_abs=False, colorbar=True, axes=ax)
plotting.show()
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'glassbrain_slopes.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
###################### other x-axis: hellaswag, perplexity ######################

fh = plot_xy(ppl_models, corr_models, 'perplexity', 'brain correlation', invert_xaxis=True)
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_ppl_corr.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))
    
fh = plot_xy(hellaswag_models, corr_models, 'hellaswag', 'brain correlation')
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_hellaswag_corr.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

fh = plot_xy(ppl_models, corr_models_rv, 'perplexity', 'brain correlation', invert_xaxis=True)
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_ppl_corr_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))
    
fh = plot_xy(hellaswag_models, corr_models_rv, 'hellaswag', 'brain correlation')
if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_hellaswag_corr_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
######## consider only largest model (10 models out of 28)
params_thsld = 3*10**9
print((n_parameters>params_thsld).sum())

In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)

ax=axes[0]
sns.regplot(x=n_parameters[n_parameters>params_thsld],
            y=corr_models[n_parameters>params_thsld],
            logx=True, ax=ax);
ax.set_xscale('log')
ax.set_xlabel('number of parameters')      
ax.set_ylabel('brain correlation')
r, p = pearsonr(np.log(n_parameters[n_parameters>params_thsld]), 
                corr_models[n_parameters>params_thsld])
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[1]
sns.regplot(x=ppl_models[n_parameters>params_thsld],
            y=corr_models[n_parameters>params_thsld],
            ax=ax);
ax.invert_xaxis()
ax.set_xlabel('perplexity')
r, p = pearsonr(ppl_models[n_parameters>params_thsld],
                corr_models[n_parameters>params_thsld])
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[2]
sns.regplot(x=hellaswag_models[n_parameters>params_thsld],
            y=corr_models[n_parameters>params_thsld],
            ax=ax);
ax.set_xlabel('hellaswag')
r, p = pearsonr(hellaswag_models[n_parameters>params_thsld],
                corr_models[n_parameters>params_thsld])
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))

In [None]:
fh, axes = plt.subplots(1, 3, figsize=(12, 3.2), sharex=False, sharey=True)
ax=axes[0]
sns.regplot(x=n_parameters[n_parameters>params_thsld],
            y=corr_models_rv[n_parameters>params_thsld],
            logx=True, ax=ax);
ax.set_xscale('log')
ax.set_xlabel('number of parameters')      
ax.set_ylabel('brain correlation')
r, p = pearsonr(np.log(n_parameters[n_parameters>params_thsld]), 
                corr_models_rv[n_parameters>params_thsld])
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

ax=axes[1]
sns.regplot(x=ppl_models[n_parameters>params_thsld],
            y=corr_models_rv[n_parameters>params_thsld],
            ax=ax);
ax.invert_xaxis()
ax.set_xlabel('perplexity')    
r, p = pearsonr(ppl_models[n_parameters>params_thsld],
                corr_models_rv[n_parameters>params_thsld])
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)  

ax=axes[2]
sns.regplot(x=hellaswag_models[n_parameters>params_thsld],
            y=corr_models_rv[n_parameters>params_thsld],
            ax=ax);
ax.set_xlabel('hellaswag') 
r, p = pearsonr(hellaswag_models[n_parameters>params_thsld],
                corr_models_rv[n_parameters>params_thsld])
ax.text(0.05, 0.95,'$r={:.2f}$\n$p={:.1e}$'.format(r,p),
              ha='left', va='top', fontsize=11, transform=ax.transAxes)

fh.subplots_adjust(wspace=0.05)

if save_fig:
    fh.savefig(os.path.join(fig_folder, 'llms_corr_above3b_rv.pdf'), bbox_inches='tight', facecolor=(1,1,1,0))