In [4]:
import numpy as np
import os
import matplotlib.pyplot as plt
import json
from pathlib import Path
import pandas as pd

In [5]:
save_dir = '/home/jl277509/Documents/Ablation_translation_3/cutout.csv'

# Step 1 : recover all perf values

In [6]:
# instead of printing, add in table
from pathlib import Path
import pandas as pd

basedir='/neurospin/dico/jlaval/Output/11_cutout_translation_3'

sub_dirs = {'SOr': [f'{basedir}/SOr_left_UKB40/', ['troiani_custom_embeddings/Left_OFC/test_values.json'], 'test_weighted_auc'],
            'FIP': [f'{basedir}/FIP_right_UKB40/', ['FIP_right_custom_embeddings/Right_FIP/test_values.json'], 'test_auc'],
            'SC-sylv': [f'{basedir}/SC-sylv_left_UKB40/', [f'hcp_isomap_custom_embeddings/Isomap_central_left_dim{k}/test_values.json' for k in range(1,7)], 'test_r2']}

l = os.listdir(basedir)
root_names = [elem for elem in l if 'UKB40' not in elem]

In [7]:
# store everything globally
l_model_glob = []
l_perf_glob = []
l_region_glob = []
l_task_glob = []


for region, (sub_dir, auc_files, metric) in sub_dirs.items(): 
    for auc_file in auc_files:
        models = os.listdir(sub_dir)
        models = [elem for elem in models if 'config' not in elem and 'multirun' not in elem]
        models.sort()
        task = Path(auc_file).parts[-2]
        for name in root_names:
            #l_models = [elem for elem in models if elem.startswith(name)]
            l_models = [elem for elem in models if name in elem]
            l_perfs = []
            for model in l_models:
                try:
                    with open(os.path.join(sub_dir, model, auc_file), 'r') as f:
                        perf = json.load(f)
                        if isinstance(perf[metric], float):
                            value = perf[metric]
                        else:
                            value = perf[metric][0]
                        l_perfs.append(value)
                except Exception:
                    print(f'{os.path.join(sub_dir, model, auc_file)} not found') ## NB : can't continue if not all files found ...
            l_model_glob.append(l_models)
            l_perf_glob.append(l_perfs)
            l_region_glob.append([region for _ in range(len(l_perfs))])
            l_task_glob.append([task for _ in range(len(l_perfs))])

In [8]:
# reformat lists and put inside DataFrame
def flatten_sum(matrix):
    return sum(matrix, [])
cols = ['model', 'region', 'task', 'score']
values = np.array([flatten_sum(l_model_glob), flatten_sum(l_region_glob), flatten_sum(l_task_glob), flatten_sum(l_perf_glob)]).T
df = pd.DataFrame(data=values, columns=cols)

In [9]:
df

Unnamed: 0,model,region,task,score
0,6_cutout_size_20_30_p80_bottom50name13-29-48_164,SOr,Left_OFC,0.6022825402337624
1,10_cutout_size_5_15_p80_bottom70name11-43-20_227,SOr,Left_OFC,0.7055319293709732
2,5_cutout_size_15_25_p80_bottom50name13-29-48_110,SOr,Left_OFC,0.6416190211223243
3,11_cutout_size_15_25_p80_bottom70name11-47-01_182,SOr,Left_OFC,0.666500677412819
4,15_cutout_size_10_20_p40_bottom70name12-47-30_248,SOr,Left_OFC,0.6887817128252479
...,...,...,...,...
123,14_cutout_size_10_20_p80_bottom50name10-50-13_201,SC-sylv,Isomap_central_left_dim6,0.4293191465267643
124,3_cutout_size_20_30_p40_bottom50name10-50-13_233,SC-sylv,Isomap_central_left_dim6,0.40681543963083355
125,9_cutout_size_20_30_p40_bottom70name11-30-53_113,SC-sylv,Isomap_central_left_dim6,0.39465159063109756
126,13_cutout_size_10_20_p40_bottom50name10-50-13_94,SC-sylv,Isomap_central_left_dim6,0.39808826863674884


# Step 2 : reorganize the df with the parameters

In [10]:
# reshape the df so that keep p is a column
# all and bottom are also in a column
list_magnitude=[]
list_proba=[]
list_bottom_proba=[]
for model in df['model'].tolist():
    if 'bottom50' in model:
        list_bottom_proba.append(50)
    elif 'bottom70' in model:
        list_bottom_proba.append(70)

    if '5_15' in model:
        list_magnitude.append('[5,15]')
    elif '10_20' in model:
        list_magnitude.append('[10,20]')
    elif '15_25' in model:
        list_magnitude.append('[15,25]')
    elif '20_30' in model:
        list_magnitude.append('[20,30]')

    if 'p40' in model:
        list_proba.append(40) 
    elif 'p80' in model:
        list_proba.append(80)


In [11]:
df['Proba'] = list_proba
df['Magnitude']=list_magnitude
df['Bottom_Proba']=list_bottom_proba

In [12]:
df_reformat = df.copy()
df_reformat = df_reformat.drop(labels='model', axis='columns')


In [13]:
# Pivoting the table
df_pivot = df_reformat.pivot(index=['Magnitude', 'Proba', 'Bottom_Proba'], columns=['task', 'region'], values='score')

# Renaming columns
df_pivot.columns = [f'score_{col}' for col in df_pivot.columns]

# Resetting index to get a flat DataFrame
df_pivot = df_pivot.reset_index()

df_pivot['score_Isomap_left_global'] = df_pivot[df_pivot.columns[-6:]].astype(float).mean(axis=1)
cols = df_pivot.columns.tolist()
cols = cols[:5]+cols[-1:]+cols[5:-1] # put average Isomap before
df_pivot=df_pivot[cols] # swap cols

In [14]:
# *100 and keep only 1 decimal place
score_cols = df_pivot.columns[3:].tolist()
df_pivot[score_cols]=df_pivot[score_cols].apply(pd.to_numeric).mul(100).round(1)

In [15]:
df_pivot

Unnamed: 0,Magnitude,Proba,Bottom_Proba,"score_('Left_OFC', 'SOr')","score_('Right_FIP', 'FIP')",score_Isomap_left_global,"score_('Isomap_central_left_dim1', 'SC-sylv')","score_('Isomap_central_left_dim2', 'SC-sylv')","score_('Isomap_central_left_dim3', 'SC-sylv')","score_('Isomap_central_left_dim4', 'SC-sylv')","score_('Isomap_central_left_dim5', 'SC-sylv')","score_('Isomap_central_left_dim6', 'SC-sylv')"
0,"[10,20]",40,50,73.8,67.6,57.1,60.4,52.6,75.2,62.7,52.0,39.8
1,"[10,20]",40,70,68.9,61.0,46.8,52.4,46.1,62.5,53.1,38.7,27.9
2,"[10,20]",80,50,68.0,72.5,58.6,63.3,55.2,74.9,65.1,50.3,42.9
3,"[10,20]",80,70,72.7,74.5,56.6,61.9,54.1,72.3,64.4,47.5,39.1
4,"[15,25]",40,50,71.0,74.8,55.1,61.2,52.8,73.6,62.2,48.2,32.7
5,"[15,25]",40,70,70.3,76.4,55.7,60.7,54.0,73.5,63.2,48.7,34.4
6,"[15,25]",80,50,64.2,77.8,58.9,64.9,54.2,74.2,65.8,52.6,41.4
7,"[15,25]",80,70,66.7,74.3,57.2,61.5,55.4,71.3,62.5,51.0,41.3
8,"[20,30]",40,50,72.4,75.8,57.1,61.5,53.2,74.1,64.8,48.2,40.7
9,"[20,30]",40,70,70.5,76.6,56.7,61.3,54.6,70.2,65.2,49.1,39.5


In [13]:
df_pivot.to_csv(save_dir, index=False)