In [52]:
import os
import pandas as pd
import json
import yaml
import numpy as np

In [1]:
def get_subdirs(directory):
    sub_dirs = os.listdir(directory)
    sub_dirs = [os.path.join(directory, name) for name in sub_dirs]
    sub_dirs = [path for path in sub_dirs if os.path.isdir(path)]
    return sub_dirs

get_subdirs("/neurospin/dico/agaudin/Runs/04_pointnet/Output/2022-08-02")

['/neurospin/dico/agaudin/Runs/04_pointnet/Output/2022-08-02/aymeric_dense_T=0.5',
 '/neurospin/dico/agaudin/Runs/04_pointnet/Output/2022-08-02/aymeric_dense_T=0.5(2)',
 '/neurospin/dico/agaudin/Runs/04_pointnet/Output/2022-08-02/joel_dense_T=0.1(2)',
 '/neurospin/dico/agaudin/Runs/04_pointnet/Output/2022-08-02/joel_dense_T=0.1']

In [97]:
def generate_bdd_models(folders, bdd_models, visited, dataset='cingulate_ACCpatterns', verbose=True):
    if verbose:
        print("Start", len(folders), len(bdd_models))

    while folders != []:
        # remove folders already treated
        folders = [folder for folder in folders if folder not in visited]
        
        # condition as folders can be emptied by the previous line
        if folders != []:
            dir_path = folders.pop()
            visited.append(dir_path)
            
            # checks if directory
            if os.path.isdir(dir_path):
                # check if directory associated to a model
                if os.path.exists(dir_path+'/.hydra/config.yaml'):
                    print("Treating", dir_path)
                    # check if values and parameters computed for the model
                    if os.path.exists(dir_path + f"/{dataset}_embeddings/values.json"):
                        model_dict = {}
                        model_dict['model_path'] = dir_path
                        # read performances
                        with open(dir_path + f"/{dataset}_embeddings/values.json", 'r') as file:
                            values = json.load(file)
                            decomposed_values = {'auc': values['cross_val_auc'][0],
                                                 'auc_std': values['cross_val_auc'][1],
                                                 'accuracy': values['cross_val_total_accuracy'][0],
                                                 'accuracy_std': values['cross_val_total_accuracy'][1]}
                            model_dict.update(decomposed_values)
                        # read parameters
                        with open(dir_path+'/partial_config.yaml', 'r') as file2:
                            partial_config = yaml.load(file2, Loader=yaml.FullLoader)
                            model_dict.update(partial_config)
                        bdd_models.append(model_dict)
                        if verbose:
                            print("End model", len(folders), len(bdd_models))

                    else:
                        print(f"Model does not have embeddings and their evaluation OR \
they are done with another database than {dataset}")

                else:
                    print(f"{dir_path} not associated to a model. Continue")
                    new_dirs = get_subdirs(dir_path)
                    folders.extend(new_dirs)
                    # remove folders already treated
                    folders = [folder for folder in folders if folder not in visited]
                    if verbose:
                        print("End recursive", len(folders), len(bdd_models))
                    
                    generate_bdd_models(folders, bdd_models, visited, dataset=dataset, verbose=verbose)
            
            else:
                print(f"{dir_path} is a file. Continue.")
                if verbose:
                    print("End file", len(bdd_models))

In [196]:
folders = ["/neurospin/dico/agaudin/Runs/04_pointnet/Output", "/neurospin/dico/agaudin/Runs/03_monkeys/Output/analysis_folders/convnet",
"/neurospin/dico/agaudin/Runs/03_monkeys/Output/analysis_folders/densenet2", "/neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration"]
bdd_models = []
visited = []

generate_bdd_models(folders, bdd_models, visited, verbose=False)

len(bdd_models)

/neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration not associated to a model. Continue
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/1mm_no_30
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/base3
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/1mm_fold_30
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/1mm_no_4
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/base1
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/drop_rate_0
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/base2
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/T_0.01
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/batch_size_4
Treating /neurospin/dico/agaudin/Runs/03_monkeys/Output/convnet_exploration/1mm_no_4(2)
Treating /neurospin/dico/agaudin/Runs/03_mon

68

In [197]:
bdd_models = pd.DataFrame(bdd_models)
bdd_models

Unnamed: 0,model_path,auc,auc_std,accuracy,accuracy_std,backbone_name,batch_size,checkerboard_size,depth_decoder,drop_rate,...,seed,sigma,temperature,temperature_initial,train_val_csv_file,weight_decay,with_labels,block_config,growth_rate,num_init_features
0,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.582225,0.041573,0.553138,0.043799,convnet,16,4,3,0.15,...,1,5,0.1,0.1,/neurospin/dico/data/deep_folding/papers/midl2...,0.00005,False,,,
1,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.644770,0.040686,0.610534,0.032800,convnet,16,4,3,0.15,...,1,5,0.1,0.1,/neurospin/dico/data/deep_folding/papers/midl2...,0.00005,False,,,
2,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.586549,0.037230,0.562088,0.034258,convnet,16,4,3,0.15,...,1,5,0.1,0.1,/neurospin/dico/data/deep_folding/papers/midl2...,0.00005,False,,,
3,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.488471,0.035180,0.548809,0.032750,convnet,16,4,3,0.15,...,1,5,0.1,0.1,/neurospin/dico/data/deep_folding/papers/midl2...,0.00005,False,,,
4,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.673959,0.041645,0.629466,0.034312,convnet,16,4,3,0.15,...,1,5,0.1,0.1,/neurospin/dico/data/deep_folding/papers/midl2...,0.00005,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.576889,0.056377,0.567038,0.039782,convnet,16,4,3,0.15,...,1,5,0.5,0.5,,0.00005,False,,,
64,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.493466,0.038922,0.503871,0.040189,,16,4,3,0.15,...,1,5,0.1,0.1,/neurospin/dico/data/deep_folding/papers/midl2...,0.00005,False,"[6, 16]",32.0,64.0
65,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.596988,0.039084,0.570522,0.037998,,16,4,3,0.15,...,1,5,0.1,0.1,/neurospin/dico/data/deep_folding/papers/midl2...,0.00005,False,"[6, 16]",32.0,64.0
66,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.490982,0.028938,0.525619,0.023270,densenet,16,4,3,0.15,...,1,5,0.5,0.5,,0.00005,False,"[6, 16]",32.0,64.0


In [198]:
def post_process_bdd_models(bdd_models, hard_remove=[]):
    # hard_remove contains columns you want to remove by hand
    bdd_models = bdd_models.drop(columns=hard_remove)

    # deal with '[' and ']'

    # specify git branch
    bdd_models['git_branch'] = 'Run_03_aymeric'
    bdd_models.loc[bdd_models.backbone_name.isna(), 'git_branch'] = 'Run_43_joel'


    # remove columns where the values never change
    remove = []
    for col in bdd_models.columns:
        col_values = bdd_models[col].dropna().unique()
        if len(col_values) <= 1:
            remove.append(col)
    bdd_models = bdd_models.drop(columns=remove)

    # sort by model_path
    bdd_models.sort_values(by="model_path", axis=0) # doesn't work I don't know why

    # sort columns by alphabetical order
    bdd_models.sort_index(axis=1)

    return bdd_models

In [200]:
bdd_models = post_process_bdd_models(bdd_models, hard_remove=["partition", "patch_size", "block_config"])
bdd_models

fin Index(['model_path', 'auc', 'auc_std', 'accuracy', 'accuracy_std',
       'backbone_name', 'batch_size', 'drop_rate', 'early_stopping_patience',
       'foldlabel', 'input_size', 'lr', 'max_angle', 'max_epochs',
       'num_outputs', 'num_representation_features', 'numpy_all',
       'temperature', 'temperature_initial', 'git_branch'],
      dtype='object')


Unnamed: 0,model_path,auc,auc_std,accuracy,accuracy_std,backbone_name,batch_size,drop_rate,early_stopping_patience,foldlabel,input_size,lr,max_angle,max_epochs,num_outputs,num_representation_features,numpy_all,temperature,temperature_initial,git_branch
0,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.582225,0.041573,0.553138,0.043799,convnet,16,0.15,100,False,"(1, 22, 46, 38)",0.0004,10,250,30,30,/neurospin/dico/data/deep_folding/current/data...,0.1,0.1,Run_03_aymeric
1,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.644770,0.040686,0.610534,0.032800,convnet,16,0.15,100,False,"(1, 17, 40, 38)",0.0004,10,250,30,30,/neurospin/dico/data/deep_folding/current/data...,0.1,0.1,Run_03_aymeric
2,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.586549,0.037230,0.562088,0.034258,convnet,16,0.15,100,True,"(1, 22, 46, 38)",0.0004,6,250,30,30,/neurospin/dico/data/deep_folding/current/data...,0.1,0.1,Run_03_aymeric
3,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.488471,0.035180,0.548809,0.032750,convnet,16,0.15,100,False,"(1, 22, 46, 38)",0.0004,10,250,4,4,/neurospin/dico/data/deep_folding/current/data...,0.1,0.1,Run_03_aymeric
4,/neurospin/dico/agaudin/Runs/03_monkeys/Output...,0.673959,0.041645,0.629466,0.034312,convnet,16,0.15,100,False,"(1, 17, 40, 38)",0.0004,10,250,30,30,/neurospin/dico/data/deep_folding/current/data...,0.1,0.1,Run_03_aymeric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.576889,0.056377,0.567038,0.039782,convnet,16,0.15,100,False,"(1, 17, 40, 38)",0.0004,10,20,30,12,/neurospin/dico/data/deep_folding/current/data...,0.5,0.5,Run_03_aymeric
64,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.493466,0.038922,0.503871,0.040189,,16,0.15,250,False,"(1, 17, 40, 38)",0.0004,10,300,30,30,/neurospin/dico/data/deep_folding/current/data...,0.1,0.1,Run_43_joel
65,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.596988,0.039084,0.570522,0.037998,,16,0.15,100,False,"(1, 17, 40, 38)",0.0004,10,250,30,30,/neurospin/dico/data/deep_folding/current/data...,0.1,0.1,Run_43_joel
66,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.490982,0.028938,0.525619,0.023270,densenet,16,0.15,100,False,"(1, 17, 40, 38)",0.0004,10,250,30,30,/neurospin/dico/data/deep_folding/current/data...,0.5,0.5,Run_03_aymeric


In [202]:
save_path = "/neurospin/dico/agaudin/Runs/bdd_models.csv"
bdd_models.to_csv(save_path, index=True)

In [193]:
bdd_models['git_branch'] = 'Run_03_aymeric'
bdd_models.columns

Index(['model_path', 'auc', 'auc_std', 'accuracy', 'accuracy_std',
       'backbone_name', 'batch_size', 'checkerboard_size', 'depth_decoder',
       'drop_rate', 'early_stopping_patience', 'environment', 'fill_value',
       'foldlabel', 'input_size', 'keep_bottom', 'lr', 'max_angle',
       'max_epochs', 'mode', 'model', 'nb_subjects', 'num_outputs',
       'num_representation_features', 'numpy_all', 'partition', 'patch_size',
       'pin_mem', 'seed', 'sigma', 'temperature', 'temperature_initial',
       'train_val_csv_file', 'weight_decay', 'with_labels', 'block_config',
       'growth_rate', 'num_init_features', 'git_branch'],
      dtype='object')