In [4]:
import numpy as np
import os
import matplotlib.pyplot as plt
import json
from pathlib import Path
import pandas as pd

In [5]:
save_dir = '/home/jl277509/Documents/Ablation/trimextremities.csv'

# Step 1 : recover all perf values

In [6]:
# instead of printing, add in table
from pathlib import Path
import pandas as pd

basedir='/neurospin/dico/jlaval/Output/9_trimextremities'

sub_dirs = {'SOr': [f'{basedir}/SOr_left_UKB40/', ['troiani_custom_embeddings/Left_OFC/test_values.json'], 'test_weighted_auc'],
            'FIP': [f'{basedir}/FIP_right_UKB40/', ['FIP_right_custom_embeddings/Right_FIP/test_values.json'], 'test_auc'],
            'SC-sylv': [f'{basedir}/SC-sylv_left_UKB40/', [f'hcp_isomap_custom_embeddings/Isomap_central_left_dim{k}/test_values.json' for k in range(1,7)], 'test_r2']}

l = os.listdir(basedir)
root_names = [elem for elem in l if 'UKB40' not in elem]

In [7]:
# store everything globally
l_model_glob = []
l_perf_glob = []
l_region_glob = []
l_task_glob = []


for region, (sub_dir, auc_files, metric) in sub_dirs.items(): 
    for auc_file in auc_files:
        models = os.listdir(sub_dir)
        models = [elem for elem in models if 'config' not in elem and 'multirun' not in elem]
        models.sort()
        task = Path(auc_file).parts[-2]
        for name in root_names:
            #l_models = [elem for elem in models if elem.startswith(name)]
            l_models = [elem for elem in models if name in elem]
            l_perfs = []
            for model in l_models:
                try:
                    with open(os.path.join(sub_dir, model, auc_file), 'r') as f:
                        perf = json.load(f)
                        if isinstance(perf[metric], float):
                            value = perf[metric]
                        else:
                            value = perf[metric][0]
                        l_perfs.append(value)
                except Exception:
                    print(f'{os.path.join(sub_dir, model, auc_file)} not found') ## NB : can't continue if not all files found ...
            l_model_glob.append(l_models)
            l_perf_glob.append(l_perfs)
            l_region_glob.append([region for _ in range(len(l_perfs))])
            l_task_glob.append([task for _ in range(len(l_perfs))])

In [8]:
# reformat lists and put inside DataFrame
def flatten_sum(matrix):
    return sum(matrix, [])
cols = ['model', 'region', 'task', 'score']
values = np.array([flatten_sum(l_model_glob), flatten_sum(l_region_glob), flatten_sum(l_task_glob), flatten_sum(l_perf_glob)]).T
df = pd.DataFrame(data=values, columns=cols)

In [9]:
df

Unnamed: 0,model,region,task,score
0,3_trimextremities_pepper50_p40name17-12-20_73,SOr,Left_OFC,0.5563927137090607
1,2_trimextremities_pepper100_p20name17-11-03_128,SOr,Left_OFC,0.5906893630062519
2,1_trimextremities_pepper50_p20name17-10-42_22,SOr,Left_OFC,0.5267205532728242
3,4_trimextremities_pepper100_p40name17-17-24_165,SOr,Left_OFC,0.5961930175399934
4,2_trimextremities_pepper100_p20name17-10-42_179,FIP,Right_FIP,0.5848283179012346
5,1_trimextremities_pepper50_p20name17-00-18_48,FIP,Right_FIP,0.6563464506172839
6,4_trimextremities_pepper100_p40name17-13-04_157,FIP,Right_FIP,0.6160783179012346
7,3_trimextremities_pepper50_p40name17-11-34_198,SC-sylv,Isomap_central_left_dim1,0.045793460032222
8,2_trimextremities_pepper100_p20name17-11-03_245,SC-sylv,Isomap_central_left_dim1,0.0558078765016434
9,1_trimextremities_pepper50_p20name17-01-35_41,SC-sylv,Isomap_central_left_dim1,0.0691731373411481


# Step 2 : reorganize the df with the parameters

In [None]:
# reshape the df so that keep p is a column
# all and bottom are also in a column
list_magnitude=[]
list_proba=[]
for model in df['model'].tolist():
    if 'pepper50' in model:
        list_magnitude.append(50)
    elif 'pepper100' in model:
        list_magnitude.append(100)

    if 'p20' in model:
        list_proba.append(20) 
    elif 'p40' in model:
        list_proba.append(40)

In [11]:
df['Proba'] = list_proba
df['Magnitude']=list_magnitude

In [12]:
df_reformat = df.copy()
df_reformat = df_reformat.drop(labels='model', axis='columns')


In [13]:
# Pivoting the table
df_pivot = df_reformat.pivot(index=['Magnitude', 'Proba'], columns=['task', 'region'], values='score')

# Renaming columns
df_pivot.columns = [f'score_{col}' for col in df_pivot.columns]

# Resetting index to get a flat DataFrame
df_pivot = df_pivot.reset_index()

df_pivot['score_Isomap_left_global'] = df_pivot[df_pivot.columns[-6:]].astype(float).mean(axis=1)
cols = df_pivot.columns.tolist()
cols = cols[:4]+cols[-1:]+cols[4:-1] # put average Isomap before
df_pivot=df_pivot[cols] # swap cols

In [14]:
# *100 and keep only 1 decimal place
score_cols = df_pivot.columns[2:].tolist()
df_pivot[score_cols]=df_pivot[score_cols].apply(pd.to_numeric).mul(100).round(1)

In [15]:
df_pivot

Unnamed: 0,Magnitude,Proba,"score_('Left_OFC', 'SOr')","score_('Right_FIP', 'FIP')",score_Isomap_left_global,"score_('Isomap_central_left_dim1', 'SC-sylv')","score_('Isomap_central_left_dim2', 'SC-sylv')","score_('Isomap_central_left_dim3', 'SC-sylv')","score_('Isomap_central_left_dim4', 'SC-sylv')","score_('Isomap_central_left_dim5', 'SC-sylv')","score_('Isomap_central_left_dim6', 'SC-sylv')"
0,1,20,52.7,65.6,10.1,6.9,10.6,19.2,8.1,4.1,11.7
1,1,40,55.6,,8.6,4.6,7.5,14.5,13.3,3.6,8.4
2,2,20,59.1,58.5,6.7,5.6,5.2,9.0,7.2,5.6,7.3
3,2,40,59.6,61.6,8.3,2.5,6.3,9.2,18.6,6.1,7.0


In [98]:
df_pivot.to_csv(save_dir, index=False)