In [33]:
import os
import glob
import re
import json
import yaml
import pandas as pd
import numpy as np

from contrastive.utils.models_database import get_subdirs, get_path2logs, get_loss

In [34]:
paths = glob.glob("/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/pretrained_UKB/ACCpatterns/same_side/16-36-40_0" + "/*_results")
path = paths[0]
print(path)

#regex = "/(?<=/).*?(?=_results)"
regex = r"[^/]*_results"

matches = re.findall(regex, path)
for match in matches:
    print(match)

/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/pretrained_UKB/ACCpatterns/same_side/16-36-40_0/cingulate_ACCpatterns_results
cingulate_ACCpatterns_results


In [35]:
def get_test_dataset(dir_path):
    regex = r"[^/]*_results"
    matches = re.findall(regex, path)
    match = matches[0]
    return match[:-8]

get_test_dataset(path)

'cingulate_ACCpatterns'

In [38]:
def process_supervised_model(model_path, verbose=False):
    # generate a dictionnary with the model's parameters and performances
    model_dict = {}
    model_dict['model_path'] = model_path

    # read performances
    results_dirs = glob.glob(model_path + "/*_results")
    for results_dir in results_dirs:
        dataset_name = get_test_dataset(results_dir)
        with open(results_dir + "/test_results.json", 'r') as file:
            values = json.load(file)
            decomposed_values = {f'{dataset_name}_test_auc': values['test_auc']}
            model_dict.update(decomposed_values)
        with open(results_dir + "/test_results_best_model.json", 'r') as file:
            values = json.load(file)
            decomposed_values = {f'{dataset_name}_test_auc_best_model': values['test_auc']}
            model_dict.update(decomposed_values)
    
    # read parameters
    with open(model_path+'/partial_config.yaml', 'r') as file2:
        partial_config = yaml.load(file2, Loader=yaml.FullLoader)
        model_dict.update(partial_config)
    
    # compute losses if necessary
    log_path = get_path2logs(model_path)
    if not os.path.exists(os.path.join(log_path, "final_losses.json")):
        if verbose:
            print(f"Get the losses for {model_path}.")
        get_loss(model_path, save=True, verbose=verbose)
    
    # get the final losses
    with open(os.path.join(log_path, "final_losses.json"), 'r') as file3:
        losses = json.load(file3)
        model_dict.update(losses)

    return model_dict

In [39]:
process_supervised_model("/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/pretrained_UKB/ACCpatterns/same_side/16-36-40_0")

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


{'model_path': '/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/pretrained_UKB/ACCpatterns/same_side/16-36-40_0',
 'cingulate_ACCpatterns_test_auc': 0.7710233029381965,
 'cingulate_ACCpatterns_test_auc_best_model': 0.7669706180344478,
 'backbone_name': 'convnet',
 'batch_size': 16,
 'checkerboard_size': 4,
 'dataset_name': 'cingulate_ACCpatterns',
 'depth_decoder': 3,
 'drop_rate': 0.05,
 'early_stopping_patience': 25,
 'environment': 'not_brainvisa',
 'fill_value': 0,
 'foldlabel': True,
 'input_size': '(1, 20, 42, 38)',
 'keep_bottom': True,
 'lr': 0.001,
 'max_angle': 6,
 'max_epochs': 100,
 'mode': 'classifier',
 'model': 'SimCLR_supervised',
 'n_max': 1099,
 'nb_subjects': -1,
 'num_representation_features': 10,
 'partition': [0.9, 0.1],
 'percentage': 0,
 'pin_mem': True,
 'pretrained_model_path': '/neurospin/dico/jchavas/Runs/59_analysis_ukbiobank/Output/trained_on_ukbiobank/chosen_model/chosen_model/logs/default/version_0/checkpoints/epoch=250-step=297685.ckpt',
 'pr

In [40]:
def generate_bdd_supervised_models(folders, bdd_models, visited,
                                   verbose=True, best_model=True):
    # depth first exploration of folders to treat all the models in it
    
    if verbose:
        print("Start", len(folders), len(bdd_models))

    while folders != []:
        # remove folders already treated
        folders = [folder for folder in folders if folder not in visited]
        
        # condition as folders can be emptied by the previous line
        if folders != []:
            dir_path = folders.pop()
            visited.append(dir_path)
            
            # checks if directory
            if os.path.isdir(dir_path):
                # check if directory associated to a model
                if os.path.exists(dir_path+'/.hydra/config.yaml'):
                    print("Treating", dir_path)
                    # check if values and parameters computed for the model
                    if glob.glob(dir_path + "/*_results") != []:
                        model_dict = process_supervised_model(dir_path)
                        bdd_models.append(model_dict)


                        if verbose:
                            print("End model", len(folders), len(bdd_models))

                    else:
                        print(f"Model has not been evaluated yet.")

                else:
                    print(f"{dir_path} not associated to a model. Continue")
                    new_dirs = get_subdirs(dir_path)
                    folders.extend(new_dirs)
                    # remove folders already treated
                    folders = [folder for folder in folders if folder not in visited]
                    if verbose:
                        print("End recursive", len(folders), len(bdd_models))
                    
                    generate_bdd_supervised_models(folders, bdd_models, visited,
                                        verbose=verbose, best_model=best_model)
            
            else:
                print(f"{dir_path} is a file. Continue.")
                if verbose:
                    print("End file", len(bdd_models))

In [48]:
folders = ["/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised"]
bdd = []
visited = []

generate_bdd_supervised_models(folders, bdd, visited)

Start 1 0
/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised not associated to a model. Continue
End recursive 4 0
Start 4 0
/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/2023-04-25 not associated to a model. Continue
End recursive 4 0
Start 4 0
Treating /neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/2023-04-25/15-40-26
Model has not been evaluated yet.
/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/ACCpatterns not associated to a model. Continue
End recursive 4 0
Start 4 0
/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/ACCpatterns/R not associated to a model. Continue
End recursive 21 0
Start 21 0
Treating /neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/ACCpatterns/R/10-35-58_3
End model 20 1
Treating /neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/ACCpatterns/R/10-04-09_0
End model 19 2
Treating /neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/ACCpatterns/R/09-46-31_2
End model 18 3
Treating /neu

In [49]:
bdd

[{'model_path': '/neurospin/dico/agaudin/Runs/09_new_repo/Output/supervised/ACCpatterns/R/10-35-58_3',
  'cingulate_ACCpatterns_test_auc': 0.7355623100303951,
  'cingulate_ACCpatterns_test_auc_best_model': 0.7862208713272543,
  'backbone_name': 'convnet',
  'batch_size': 16,
  'checkerboard_size': 4,
  'dataset_name': 'cingulate_ACCpatterns',
  'depth_decoder': 3,
  'drop_rate': 0.05,
  'early_stopping_patience': 25,
  'environment': 'not_brainvisa',
  'fill_value': 0,
  'foldlabel': True,
  'input_size': '(1, 20, 42, 38)',
  'keep_bottom': True,
  'lr': 0.0005,
  'max_angle': 6,
  'max_epochs': 100,
  'mode': 'classifier',
  'model': 'SimCLR_supervised',
  'n_max': 1099,
  'nb_subjects': -1,
  'num_representation_features': 10,
  'partition': [0.9, 0.1],
  'percentage': 30,
  'pin_mem': True,
  'pretrained_model_path': None,
  'proportion_pure_contrastive': 0.0,
  'random_state': None,
  'seed': 1,
  'sigma': 5,
  'sigma_labels': 2.0,
  'temperature': 0.1,
  'temperature_initial': 0.1