In [1]:
import pickle

# Read binary pickle file
with open("results_ctrlAV.pkl", "rb") as f:
    data = pickle.load(f)


In [2]:
import pandas as pd

df_list = []

for participant, roi_dict in data.items():
    for roi, model_dict in roi_dict.items():
        # convert each model: array to a column + keep its index
        temp = pd.DataFrame(model_dict)
        
        # Reset index so the row index = timepoint
        temp = temp.reset_index().rename(columns={'index': 'index'})
        # "index" now goes from 0 .. 1576
        
        # Melt to long format
        temp = temp.melt(id_vars="index", var_name="model", value_name="value")
        
        # Add metadata
        temp["participant"] = participant
        temp["roi"] = roi
        
        df_list.append(temp)

df = pd.concat(df_list, ignore_index=True)



In [3]:
df

Unnamed: 0,index,model,value,participant,roi
0,0,Llama_layers_layers7-11,-0.074724,33,2
1,1,Llama_layers_layers7-11,0.056496,33,2
2,2,Llama_layers_layers7-11,-0.128081,33,2
3,3,Llama_layers_layers7-11,0.130229,33,2
4,4,Llama_layers_layers7-11,-0.071431,33,2
...,...,...,...,...,...
2561043,1572,binder_concreteness,-0.029869,32,face
2561044,1573,binder_concreteness,0.012246,32,face
2561045,1574,binder_concreteness,-0.092258,32,face
2561046,1575,binder_concreteness,-0.035142,32,face


In [4]:
blind = {33, 35, 36, 38, 39, 41, 42, 43, 53}
ctrlA = {3, 4, 5, 6, 7, 8, 9, 10, 11, 27}
ctrlAV = {12, 13, 14, 15, 16, 17, 18, 19, 22, 32}

participant_to_group = {}

for p in blind:
    participant_to_group[p] = "blind"
for p in ctrlA:
    participant_to_group[p] = "ctrlA"
for p in ctrlAV:
    participant_to_group[p] = "ctrlAV"

df["participant_group"] = df["participant"].map(participant_to_group)


In [5]:
df

Unnamed: 0,index,model,value,participant,roi,participant_group
0,0,Llama_layers_layers7-11,-0.074724,33,2,blind
1,1,Llama_layers_layers7-11,0.056496,33,2,blind
2,2,Llama_layers_layers7-11,-0.128081,33,2,blind
3,3,Llama_layers_layers7-11,0.130229,33,2,blind
4,4,Llama_layers_layers7-11,-0.071431,33,2,blind
...,...,...,...,...,...,...
2561043,1572,binder_concreteness,-0.029869,32,face,ctrlAV
2561044,1573,binder_concreteness,0.012246,32,face,ctrlAV
2561045,1574,binder_concreteness,-0.092258,32,face,ctrlAV
2561046,1575,binder_concreteness,-0.035142,32,face,ctrlAV


In [6]:
# Read binary pickle file
with open("qwentext/results_ctrlAV.pkl", "rb") as f:
    new_data = pickle.load(f)

In [7]:
exclude_models = {"binder_abstractness", "binder_concreteness"}

import pandas as pd

df_list = []

for participant, roi_dict in new_data.items():   # <-- your second dictionary
    for roi, model_dict in roi_dict.items():

        # Remove binder models
        filtered = {m: arr for m, arr in model_dict.items() if m not in exclude_models}

        # Continue only if something remains
        if not filtered:
            continue
        
        # Convert arrays → long format
        temp = pd.DataFrame(filtered).reset_index().rename(columns={'index': 'index'})
        temp = temp.melt(id_vars="index", var_name="model", value_name="value")

        temp["participant"] = participant
        temp["roi"] = roi

        df_list.append(temp)

df_new = pd.concat(df_list, ignore_index=True)

df_new["participant_group"] = df_new["participant"].map(participant_to_group)


In [8]:
df_combined = pd.concat([df, df_new], ignore_index=True)


In [9]:
df_combined

Unnamed: 0,index,model,value,participant,roi,participant_group
0,0,Llama_layers_layers7-11,-0.074724,33,2,blind
1,1,Llama_layers_layers7-11,0.056496,33,2,blind
2,2,Llama_layers_layers7-11,-0.128081,33,2,blind
3,3,Llama_layers_layers7-11,0.130229,33,2,blind
4,4,Llama_layers_layers7-11,-0.071431,33,2,blind
...,...,...,...,...,...,...
2926907,1572,qwen-text_layers16-20_conv,0.041527,32,face,ctrlAV
2926908,1573,qwen-text_layers16-20_conv,0.063453,32,face,ctrlAV
2926909,1574,qwen-text_layers16-20_conv,0.087940,32,face,ctrlAV
2926910,1575,qwen-text_layers16-20_conv,-0.029212,32,face,ctrlAV


In [10]:
with open("semantic/results_semantic.pkl", "rb") as f:
    semanticresults = pickle.load(f)

print(semanticresults)

{('binder_concreteness', 'Llama_layers_layers7-11'): array([-0.029333  , -0.03696641,  0.03942497, ...,  0.00633964,
        0.00707369,  0.00748555], shape=(1577,)), ('binder_concreteness', 'CLIPmultilingualmulti_layers11-15'): array([ 0.11705375,  0.1144265 ,  0.11454238, ..., -0.05413075,
       -0.04792858, -0.04692644], shape=(1577,)), ('binder_concreteness', 'qwen-text_layers16-20_conv'): array([ 0.06959846,  0.10228053,  0.06608655, ...,  0.10551191,
        0.09112249, -0.07064948], shape=(1577,)), ('binder_concreteness', 'qwen-audiovideotext_layers16-20'): array([ 0.03039857,  0.04032052,  0.05169334, ...,  0.11369727,
        0.1018167 , -0.04168547], shape=(1577,)), ('binder_concreteness', 'CLIPmultilingualtext_layers11-15'): array([ 0.13917391,  0.12873833,  0.12423615, ..., -0.02152618,
       -0.01561056, -0.01451901], shape=(1577,)), ('binder_concreteness', 'XLM-roberta_layers11-15'): array([0.08773513, 0.13049882, 0.03640405, ..., 0.06752426, 0.06714298,
       0.066927

In [11]:
import numpy as np

# The original correlation dict:
# corr_dict = { (binder_model, model_name): array([...]), ... }

corr_conc = {
    model: arr
    for (binder, model), arr in semanticresults.items()
    if binder == 'binder_concreteness'
}

corr_abs = {
    model: arr
    for (binder, model), arr in semanticresults.items()
    if binder == 'binder_abstractness'
}

df_combined["corr_concreteness"] = np.nan
df_combined["corr_abstractness"] = np.nan

for model_name, arr in corr_conc.items():
    mask = df_combined["model"] == model_name
    df_combined.loc[mask, "corr_concreteness"] = arr[df_combined.loc[mask, "index"].values]
    
for model_name, arr in corr_abs.items():
    mask = df_combined["model"] == model_name
    df_combined.loc[mask, "corr_abstractness"] = arr[df_combined.loc[mask, "index"].values]


In [12]:
df_long = df_combined.melt(
    id_vars=['participant_group', 'participant', 'roi', 'model', 'index', 'value'],
    value_vars=['corr_concreteness', 'corr_abstractness'],
    var_name='semantic_model',
    value_name='semantic_value'
)

# Optionally clean semantic_model names
df_long['semantic_model'] = df_long['semantic_model'].str.replace('corr_', '')

# Create the column
df_long["roi_type"] = df_long["roi"].apply(
    lambda x: "language" if isinstance(x, (int, float)) else "visual"
)

# Move roi_type right after roi
roi_index = df_long.columns.get_loc("roi")
df_long.insert(roi_index + 1, "roi_type", df_long.pop("roi_type"))

df_long


Unnamed: 0,participant_group,participant,roi,roi_type,model,index,value,semantic_model,semantic_value
0,blind,33,2,language,Llama_layers_layers7-11,0,-0.074724,concreteness,-0.029333
1,blind,33,2,language,Llama_layers_layers7-11,1,0.056496,concreteness,-0.036966
2,blind,33,2,language,Llama_layers_layers7-11,2,-0.128081,concreteness,0.039425
3,blind,33,2,language,Llama_layers_layers7-11,3,0.130229,concreteness,0.046623
4,blind,33,2,language,Llama_layers_layers7-11,4,-0.071431,concreteness,0.057476
...,...,...,...,...,...,...,...,...,...
5853819,ctrlAV,32,face,visual,qwen-text_layers16-20_conv,1572,0.041527,abstractness,0.124665
5853820,ctrlAV,32,face,visual,qwen-text_layers16-20_conv,1573,0.063453,abstractness,0.090562
5853821,ctrlAV,32,face,visual,qwen-text_layers16-20_conv,1574,0.087940,abstractness,0.048710
5853822,ctrlAV,32,face,visual,qwen-text_layers16-20_conv,1575,-0.029212,abstractness,0.026334


In [14]:
df_long.to_csv("results.csv", sep=";", index=False)