In [34]:
import numpy as np
import os
import matplotlib.pyplot as plt
import json
from pathlib import Path
import pandas as pd

In [35]:
save_dir = '/home/jl277509/Documents/Ablation_translation_3/cutin.csv'

# Step 1 : recover all perf values

In [36]:
# instead of printing, add in table
from pathlib import Path
import pandas as pd

basedir='/neurospin/dico/jlaval/Output/10_cutin_translation_3'

sub_dirs = {'SOr': [f'{basedir}/SOr_left_UKB40/', ['troiani_custom_embeddings/Left_OFC/test_values.json'], 'test_weighted_auc'],
            'FIP': [f'{basedir}/FIP_right_UKB40/', ['FIP_right_custom_embeddings/Right_FIP/test_values.json'], 'test_auc'],
            'SC-sylv': [f'{basedir}/SC-sylv_left_UKB40/', [f'hcp_isomap_custom_embeddings/Isomap_central_left_dim{k}/test_values.json' for k in range(1,7)], 'test_r2']}

l = os.listdir(basedir)
root_names = [elem for elem in l if ('UKB40' not in elem) and '#' not in elem]

In [37]:
# store everything globally
l_model_glob = []
l_perf_glob = []
l_region_glob = []
l_task_glob = []


for region, (sub_dir, auc_files, metric) in sub_dirs.items(): 
    for auc_file in auc_files:
        models = os.listdir(sub_dir)
        models = [elem for elem in models if 'config' not in elem and 'multirun' not in elem]
        models.sort()
        task = Path(auc_file).parts[-2]
        for name in root_names:
            #l_models = [elem for elem in models if elem.startswith(name)]
            l_models = [elem for elem in models if name in elem]
            l_perfs = []
            for model in l_models:
                try:
                    with open(os.path.join(sub_dir, model, auc_file), 'r') as f:
                        perf = json.load(f)
                        if isinstance(perf[metric], float):
                            value = perf[metric]
                        else:
                            value = perf[metric][0]
                        l_perfs.append(value)
                except Exception:
                    print(f'{os.path.join(sub_dir, model, auc_file)} not found') ## NB : can't continue if not all files found ...
            l_model_glob.append(l_models)
            l_perf_glob.append(l_perfs)
            l_region_glob.append([region for _ in range(len(l_perfs))])
            l_task_glob.append([task for _ in range(len(l_perfs))])

In [38]:
# reformat lists and put inside DataFrame
def flatten_sum(matrix):
    return sum(matrix, [])
cols = ['model', 'region', 'task', 'score']
values = np.array([flatten_sum(l_model_glob), flatten_sum(l_region_glob), flatten_sum(l_task_glob), flatten_sum(l_perf_glob)]).T
df = pd.DataFrame(data=values, columns=cols)

In [39]:
df

Unnamed: 0,model,region,task,score
0,16_cutin_size_30_50_p80_bottom70name14-52-54_224,SOr,Left_OFC,0.6783797303656429
1,7_cutin_size_20_40_p40_bottom70name15-47-11_68,SOr,Left_OFC,0.7422606819829949
2,12_cutin_size_50_70_p80_bottom70name13-44-48_126,SOr,Left_OFC,0.7414122444865833
3,2_cutin_size_40_60_p40_bottom50name15-15-28_135,SOr,Left_OFC,0.7677613196453422
4,5_cutin_size_40_60_p80_bottom50name15-46-25_114,SOr,Left_OFC,0.746105033961302
...,...,...,...,...
115,9_cutin_size_50_70_p40_bottom70name16-04-22_84,SC-sylv,Isomap_central_left_dim6,0.2920686026422993
116,1_cutin_size_20_40_p40_bottom50name15-09-31_205,SC-sylv,Isomap_central_left_dim6,0.3226861294640726
117,11_cutin_size_40_60_p80_bottom70name13-36-33_41,SC-sylv,Isomap_central_left_dim6,0.3529609913966494
118,15_cutin_size_30_50_p40_bottom70name14-42-55_49,SC-sylv,Isomap_central_left_dim6,0.35998084605235303


# Step 2 : reorganize the df with the parameters

In [40]:
# reshape the df so that keep p is a column
# all and bottom are also in a column
list_magnitude=[]
list_proba=[]
list_bottom_proba=[]
for model in df['model'].tolist():
    if 'bottom50' in model:
        list_bottom_proba.append(50)
    elif 'bottom70' in model:
        list_bottom_proba.append(70)

    if '20_40' in model:
        list_magnitude.append('[20,40]')
    elif '30_50' in model:
        list_magnitude.append('[30,50]')
    elif '40_60' in model:
        list_magnitude.append('[40,60]')
    elif '50_70' in model:
        list_magnitude.append('[50,70]')

    if 'p40' in model:
        list_proba.append(40) 
    elif 'p80' in model:
        list_proba.append(80)


In [41]:
df['Proba'] = list_proba
df['Magnitude']=list_magnitude
df['Bottom_Proba']=list_bottom_proba

In [42]:
df_reformat = df.copy()
df_reformat = df_reformat.drop(labels='model', axis='columns')


In [43]:
# Pivoting the table
df_pivot = df_reformat.pivot(index=['Magnitude', 'Proba', 'Bottom_Proba'], columns=['task', 'region'], values='score')

# Renaming columns
df_pivot.columns = [f'score_{col}' for col in df_pivot.columns]

# Resetting index to get a flat DataFrame
df_pivot = df_pivot.reset_index()

df_pivot['score_Isomap_left_global'] = df_pivot[df_pivot.columns[-6:]].astype(float).mean(axis=1)
cols = df_pivot.columns.tolist()
cols = cols[:5]+cols[-1:]+cols[5:-1] # put average Isomap before
df_pivot=df_pivot[cols] # swap cols

In [44]:
# *100 and keep only 1 decimal place
score_cols = df_pivot.columns[3:].tolist()
df_pivot[score_cols]=df_pivot[score_cols].apply(pd.to_numeric).mul(100).round(1)

In [45]:
df_pivot

Unnamed: 0,Magnitude,Proba,Bottom_Proba,"score_('Left_OFC', 'SOr')","score_('Right_FIP', 'FIP')",score_Isomap_left_global,"score_('Isomap_central_left_dim1', 'SC-sylv')","score_('Isomap_central_left_dim2', 'SC-sylv')","score_('Isomap_central_left_dim3', 'SC-sylv')","score_('Isomap_central_left_dim4', 'SC-sylv')","score_('Isomap_central_left_dim5', 'SC-sylv')","score_('Isomap_central_left_dim6', 'SC-sylv')"
0,"[20,40]",40,50,75.7,80.8,50.9,56.7,54.0,63.8,57.7,41.2,32.3
1,"[20,40]",40,70,74.2,78.2,43.3,53.8,49.2,54.1,49.7,32.1,20.9
2,"[20,40]",80,50,72.0,82.0,53.8,58.2,56.9,70.0,60.8,41.9,35.3
3,"[20,40]",80,70,67.3,77.5,52.6,57.0,55.6,66.5,59.9,41.6,34.8
4,"[30,50]",40,50,75.0,83.5,52.8,58.0,53.8,70.6,62.4,41.9,30.4
5,"[30,50]",40,70,71.3,78.7,53.4,58.2,54.4,68.1,61.2,42.5,36.0
6,"[30,50]",80,50,71.0,80.4,52.3,58.4,55.2,68.9,60.6,39.2,31.7
7,"[30,50]",80,70,67.8,81.1,50.8,59.3,52.0,64.5,56.4,44.1,28.3
8,"[40,60]",40,50,76.8,79.2,50.7,58.8,53.0,71.2,58.7,33.3,29.0
9,"[40,60]",40,70,74.9,78.7,53.5,57.1,53.2,67.5,61.6,45.6,35.9


In [98]:
df_pivot.to_csv(save_dir, index=False)