In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import json
from pathlib import Path
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [None]:
save_dir = '/home/jl277509/Documents/Ablation/cutin.csv'

# Step 1 : recover all perf values

In [None]:
# instead of printing, add in table
from pathlib import Path
import pandas as pd

basedir='/neurospin/dico/jlaval/Output/10_cutin'

sub_dirs = {'SOr': [f'{basedir}/SOr_left_UKB40/', ['troiani_custom_embeddings/Left_OFC/test_values.json'], 'test_weighted_auc'],
            'FIP': [f'{basedir}/FIP_right_UKB40/', ['FIP_right_custom_embeddings/Right_FIP/test_values.json'], 'test_auc'],
            'SC-sylv': [f'{basedir}/SC-sylv_left_UKB40/', [f'hcp_isomap_custom_embeddings/Isomap_central_left_dim{k}/test_values.json' for k in range(1,7)], 'test_r2']}

l = os.listdir(basedir)
root_names = [elem for elem in l if 'UKB40' not in elem]

In [27]:
# store everything globally
l_model_glob = []
l_perf_glob = []
l_region_glob = []
l_task_glob = []


for region, (sub_dir, auc_files, metric) in sub_dirs.items(): 
    for auc_file in auc_files:
        models = os.listdir(sub_dir)
        models = [elem for elem in models if 'config' not in elem and 'multirun' not in elem]
        models.sort()
        task = Path(auc_file).parts[-2]
        for name in root_names:
            #l_models = [elem for elem in models if elem.startswith(name)]
            l_models = [elem for elem in models if name in elem]
            l_perfs = []
            for model in l_models:
                try:
                    with open(os.path.join(sub_dir, model, auc_file), 'r') as f:
                        perf = json.load(f)
                        if isinstance(perf[metric], float):
                            value = perf[metric]
                        else:
                            value = perf[metric][0]
                        l_perfs.append(value)
                except Exception:
                    print(f'{os.path.join(sub_dir, model, auc_file)} not found') ## NB : can't continue if not all files found ...
            l_model_glob.append(l_models)
            l_perf_glob.append(l_perfs)
            l_region_glob.append([region for _ in range(len(l_perfs))])
            l_task_glob.append([task for _ in range(len(l_perfs))])

In [28]:
# reformat lists and put inside DataFrame
def flatten_sum(matrix):
    return sum(matrix, [])
cols = ['model', 'region', 'task', 'score']
values = np.array([flatten_sum(l_model_glob), flatten_sum(l_region_glob), flatten_sum(l_task_glob), flatten_sum(l_perf_glob)]).T
df = pd.DataFrame(data=values, columns=cols)

In [29]:
df

Unnamed: 0,model,region,task,score
0,1_translation_1_p100name14-24-50_92,SOr,Left_OFC,0.6250390578584413
1,1_translation_1_p40name12-14-10_131,SOr,Left_OFC,0.6337177041428826
2,2_translation_2_p100name15-56-14_234,SOr,Left_OFC,0.6205114334796715
3,2_translation_2_p40name12-18-50_164,SOr,Left_OFC,0.6245810123915957
4,3_translation_3_p100name16-05-08_73,SOr,Left_OFC,0.6379515727699808
...,...,...,...,...
67,3_translation_3_p100name15-56-13_172,SC-sylv,Isomap_central_left_dim6,0.1104878054954006
68,3_translation_3_p40name12-19-55_134,SC-sylv,Isomap_central_left_dim6,0.08016272295651783
69,4_translation_1_p80name12-28-52_5,SC-sylv,Isomap_central_left_dim6,0.03032191232552417
70,5_translation_2_p80name12-31-37_87,SC-sylv,Isomap_central_left_dim6,0.031431639797924404


# Step 2 : reorganize the df with the parameters

In [None]:
# reshape the df so that keep p is a column
# all and bottom are also in a column
list_magnitude=[]
list_proba=[]
list_bottom_proba=[]
for model in df['model'].tolist():
    if 'bottom50' in model:
        list_bottom_proba.append(50)
    elif 'bottom70' in model:
        list_bottom_proba.append(70)

    if '20_40' in model:
        list_magnitude.append('[20,40]')
    elif '30_50' in model:
        list_magnitude.append('[30,50]')
    elif '40_60' in model:
        list_magnitude.append('[40,60]')
    elif '50_70' in model:
        list_magnitude.append('[50,70]')

    if 'p40' in model:
        list_proba.append(40) 
    elif 'p80' in model:
        list_proba.append(80)


In [None]:
df['Proba'] = list_proba
df['Magnitude']=list_magnitude
df['Bottom_Proba']=list_bottom_proba

In [60]:
df_reformat = df.copy()
df_reformat = df_reformat.drop(labels='model', axis='columns')


In [None]:
# Pivoting the table
df_pivot = df_reformat.pivot(index=['Magnitude', 'Proba', 'Bottom_Proba'], columns=['task', 'region'], values='score')

# Renaming columns
df_pivot.columns = [f'score_{col}' for col in df_pivot.columns]

# Resetting index to get a flat DataFrame
df_pivot = df_pivot.reset_index()

df_pivot['score_Isomap_left_global'] = df_pivot[df_pivot.columns[-6:]].astype(float).mean(axis=1)
cols = df_pivot.columns.tolist()
cols = cols[:5]+cols[-1:]+cols[5:-1] # put average Isomap before
df_pivot=df_pivot[cols] # swap cols

In [None]:
# *100 and keep only 1 decimal place
score_cols = df_pivot.columns[3:].tolist()
df_pivot[score_cols]=df_pivot[score_cols].apply(pd.to_numeric).mul(100).round(1)

In [96]:
df_pivot

Unnamed: 0,Magnitude,Proba,"score_('Left_OFC', 'SOr')","score_('Right_FIP', 'FIP')",score_Isomap_left_global,"score_('Isomap_central_left_dim1', 'SC-sylv')","score_('Isomap_central_left_dim2', 'SC-sylv')","score_('Isomap_central_left_dim3', 'SC-sylv')","score_('Isomap_central_left_dim4', 'SC-sylv')","score_('Isomap_central_left_dim5', 'SC-sylv')","score_('Isomap_central_left_dim6', 'SC-sylv')"
0,1,40,63.4,73.5,4.1,7.1,1.2,5.8,1.3,4.9,4.4
1,1,80,64.2,74.7,3.5,8.0,1.0,5.8,-0.0,3.3,3.0
2,1,100,62.5,73.1,6.8,9.8,-0.4,18.0,-0.2,4.2,9.5
3,2,40,62.5,73.3,4.2,7.7,0.4,8.1,-0.1,4.1,4.8
4,2,80,63.8,68.6,3.2,7.7,-0.4,5.9,-0.1,3.2,3.1
5,2,100,62.1,71.1,11.6,14.6,7.2,27.7,4.6,4.6,11.0
6,3,40,61.8,69.2,8.5,10.8,4.2,21.5,5.3,1.1,8.0
7,3,80,60.5,74.7,5.5,9.4,1.3,10.0,0.8,5.9,5.6
8,3,100,63.8,74.9,12.3,16.7,6.8,27.2,6.1,5.9,11.0


In [98]:
df_pivot.to_csv(save_dir, index=False)