In [7]:
import numpy as np
import os
import matplotlib.pyplot as plt
import json
from pathlib import Path
import pandas as pd

In [8]:
save_dir = '/home/jl277509/Documents/Ablation/translation.csv'

# Step 1 : recover all perf values (with cross val std)

In [9]:
# instead of printing, add in table
from pathlib import Path
import pandas as pd

basedir='/neurospin/dico/jlaval/Output/ablation_2_models/7_translation'

sub_dirs = {'SOr': [f'{basedir}/SOr_left_UKB40/', ['troiani_custom_embeddings/Left_OFC/test_values.json'], 'test_weighted_auc'],
            'FIP': [f'{basedir}/FIP_right_UKB40/', ['FIP_right_custom_embeddings/Right_FIP/test_values.json'], 'test_auc'],
            'SC-sylv': [f'{basedir}/SC-sylv_left_UKB40/', [f'hcp_isomap_custom_embeddings/Isomap_central_left_dim{k}/test_values.json' for k in range(1,7)], 'test_r2']}

l = os.listdir(basedir)
root_names = [elem for elem in l if ('UKB40' not in elem) and '#' not in elem]

In [None]:
# store everything globally
l_model_glob = []
l_perf_glob = []
l_region_glob = []
l_task_glob = []


for region, (sub_dir, auc_files, metric) in sub_dirs.items(): 
    for auc_file in auc_files:
        models = os.listdir(sub_dir)
        models = [elem for elem in models if 'config' not in elem and 'multirun' not in elem]
        models.sort()
        task = Path(auc_file).parts[-2]
        for name in root_names:
            #l_models = [elem for elem in models if elem.startswith(name)]
            l_models = [elem for elem in models if name in elem]
            l_perfs = []
            for model in l_models:
                try:
                    with open(os.path.join(sub_dir, model, auc_file), 'r') as f:
                        perf = json.load(f)
                        if isinstance(perf[metric], float):
                            value = perf[metric]
                        else:
                            value = perf[metric][0]
                        l_perfs.append(value)
                except Exception:
                    print(f'{os.path.join(sub_dir, model, auc_file)} not found') ## NB : can't continue if not all files found ...
            l_model_glob.append(l_models)
            l_perf_glob.append(l_perfs)
            l_region_glob.append([region for _ in range(len(l_perfs))])
            l_task_glob.append([task for _ in range(len(l_perfs))])

In [5]:
# reformat lists and put inside DataFrame
def flatten_sum(matrix):
    return sum(matrix, [])
cols = ['model', 'region', 'task', 'score']
values = np.array([flatten_sum(l_model_glob), flatten_sum(l_region_glob), flatten_sum(l_task_glob), flatten_sum(l_perf_glob)]).T
df = pd.DataFrame(data=values, columns=cols)

# Step 2 : reorganize the df with the parameters

In [None]:
# reshape the df so that keep p is a column
# all and bottom are also in a column
list_magnitude=[]
list_proba=[]
for model in df['model'].tolist():
    if 'translation_1' in model:
        list_magnitude.append(1)
    elif 'translation_2' in model:
        list_magnitude.append(2)
    elif 'translation_3' in model:
        list_magnitude.append(3)

    if 'p40' in model:
        list_proba.append(40) 
    elif 'p80' in model:
        list_proba.append(80)
    elif 'p100' in model:
        list_proba.append(100)

In [None]:
df['Proba'] = list_proba
df['Magnitude']=list_magnitude

In [9]:
df_reformat = df.copy()
df_reformat = df_reformat.drop(labels='model', axis='columns')


In [10]:
# Pivoting the table
df_pivot = df_reformat.pivot(index=['Magnitude', 'Proba'], columns=['task', 'region'], values='score')

# Renaming columns
df_pivot.columns = [f'score_{col}' for col in df_pivot.columns]

# Resetting index to get a flat DataFrame
df_pivot = df_pivot.reset_index()

df_pivot['score_Isomap_left_global'] = df_pivot[df_pivot.columns[-6:]].astype(float).mean(axis=1)
cols = df_pivot.columns.tolist()
cols = cols[:4]+cols[-1:]+cols[4:-1] # put average Isomap before
df_pivot=df_pivot[cols] # swap cols

In [11]:
# *100 and keep only 1 decimal place
score_cols = df_pivot.columns[2:].tolist()
df_pivot[score_cols]=df_pivot[score_cols].apply(pd.to_numeric).mul(100).round(1)

In [12]:
df_pivot

Unnamed: 0,Magnitude,Proba,"score_('Left_OFC', 'SOr')","score_('Right_FIP', 'FIP')",score_Isomap_left_global,"score_('Isomap_central_left_dim1', 'SC-sylv')","score_('Isomap_central_left_dim2', 'SC-sylv')","score_('Isomap_central_left_dim3', 'SC-sylv')","score_('Isomap_central_left_dim4', 'SC-sylv')","score_('Isomap_central_left_dim5', 'SC-sylv')","score_('Isomap_central_left_dim6', 'SC-sylv')"
0,1,40,65.8,73.3,4.1,7.1,1.2,5.8,1.3,4.9,4.4
1,1,80,62.8,75.5,3.5,8.0,1.0,5.8,-0.0,3.3,3.0
2,1,100,63.6,72.7,6.8,9.8,-0.4,18.0,-0.2,4.2,9.5
3,2,40,64.8,73.2,4.2,7.7,0.4,8.1,-0.1,4.1,4.8
4,2,80,65.9,68.6,3.2,7.7,-0.4,5.9,-0.1,3.2,3.1
5,2,100,61.1,71.0,11.6,14.6,7.2,27.7,4.6,4.6,11.0
6,3,40,62.9,70.7,8.5,10.8,4.2,21.5,5.3,1.1,8.0
7,3,80,62.8,74.2,5.5,9.4,1.3,10.0,0.8,5.9,5.6
8,3,100,64.7,74.6,12.3,16.7,6.8,27.2,6.1,5.9,11.0


In [98]:
df_pivot.to_csv(save_dir, index=False)