Notebook where to plot the curves and display the performance of all models in 
a chosen folder. It is also where to conduct the performance analysis about the
models database.

In [16]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpi
import json


from contrastive.utils.config import get_config_diff

## Function definitions

In [17]:
def exclude_conditions(diff_config, excluded_conditions):
    excluded_subdirs = []
    for cond in excluded_conditions:
        filtered_diff_config = diff_config.query(cond)
        excluded_subdirs.extend(filtered_diff_config['Unnamed: 0'].values)
    return excluded_subdirs
#exclude_conditions(diff_config, excluded_conditions=["lr != 0.0004"])

In [18]:
def show_results(dir_path, figsize=(16,10), return_config_diff=False, dataset='cingulate_ACCpatterns_0', excluded_conditions=[], show=True, key_name=""):
    # be sure that all models are comparable
    get_config_diff(dir_path, whole_config=False)
    diff_config = pd.read_csv(dir_path+'/config_diff.csv')

    excluded_subdirs = exclude_conditions(diff_config, excluded_conditions)
    if excluded_conditions != []:
        print("Excluded subdirs :", excluded_subdirs)
        print("Excluded because of ", excluded_conditions)

    if show:
        print("diff_config")
        print(diff_config[~diff_config['Unnamed: 0'].isin(excluded_subdirs)])

    paths = []
    if key_name:
        keys = []
    accuracy = []
    accuracy_std = []
    auc = []
    auc_std = []
    # plot all ROC curves
    for subdir in os.listdir(dir_path):
        if os.path.isdir(dir_path + '/' + subdir) and subdir not in excluded_subdirs:
            path = dir_path + '/' + subdir + '/' + dataset + '_embeddings'
            with open(path+'/values.json', 'r') as file:
                values = json.load(file)
            # print(subdir)
            # print("AUC", values['cross_val_auc'])
            if show:
                img = mpi.imread(path+'/cross_val_ROC_curves.png')
                plt.figure(figsize=figsize)
                plt.imshow(img)
                plt.show()

            # store the AUC and accuracy in a df
            paths.append(subdir)
            if key_name:
                key = diff_config[diff_config['Unnamed: 0'] == subdir][key_name].values[0]
            # print(f"subdir = {subdir}; batch_size = {batch_size}; diff_config = {diff_config[diff_config['Unnamed: 0'] == subdir]}")
            if key_name:
                keys.append(key)
            # print(diff_config[diff_config['Unnamed: 0'] == subdir])
            accuracy.append(values['cross_val_total_accuracy'][0])
            accuracy_std.append(values['cross_val_total_accuracy'][1])
            auc.append(values['cross_val_auc'][0])
            auc_std.append(values['cross_val_auc'][1])

    results_df = pd.DataFrame()
    if key_name:
        print(keys)
        results_df[key_name] = keys
    results_df['path'] = paths
    results_df['auc'] = auc
    results_df['auc_std'] = auc_std
    results_df['accuracy'] = accuracy
    results_df['accuracy_std'] = accuracy_std


    if return_config_diff:
        return results_df, diff_config
    else:
        return results_df

## Gender: temp=0.5, prop=0.5, trained on HCP-haf-2

In [19]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_2/supervised/temp-0.5_prop-0.5",
    dataset='cingulate_ACCpatterns_1',
    excluded_conditions=excluded_conditions,
    show=False)

results_df

Unnamed: 0,path,auc,auc_std,accuracy,accuracy_std
0,15-06-16_1,0.687728,0.002609,0.654339,0.006407
1,15-06-16_0,0.684006,0.003762,0.67848,0.007104
2,15-06-16_2,0.72019,0.007042,0.688982,0.007736
3,15-06-16_4,0.621985,0.004735,0.614573,0.005571
4,15-06-16_3,0.673402,0.006276,0.635673,0.010568


In [20]:
results_df.sort_values(ascending=False, by='auc')[['path', 'auc']]

Unnamed: 0,path,auc
2,15-06-16_2,0.72019
0,15-06-16_1,0.687728
1,15-06-16_0,0.684006
4,15-06-16_3,0.673402
3,15-06-16_4,0.621985


In [21]:
results_df.sort_values(ascending=False, by='auc')[['path', 'auc']].mean()

auc    0.677462
dtype: float64

## Gender: temp=0.1,0.3,0.5, prop=0.5,0.8 trained on HCP-haf-1

In [22]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/supervised",
    dataset='cingulate_ACCpatterns_0',
    excluded_conditions=excluded_conditions,
    show=False)

results_df

FileNotFoundError: [Errno 2] No such file or directory: '/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/supervised/gender/cingulate_ACCpatterns_0_embeddings/values.json'

In [None]:
results_df.sort_values(ascending=False, by='auc')[['path', 'auc']]

Unnamed: 0,path,auc
2,16-26-39_0,0.718857
0,16-26-39_3,0.716612
1,16-26-39_2,0.688256
3,16-26-39_1,0.681777


## NEOFAC_C

In [34]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/supervised/NEOFAC_C/prop-0.9",
    dataset='cingulate_ACCpatterns_0',
    excluded_conditions=excluded_conditions,
    show=False)

results_df.sort_values(ascending=True, by='path')[['path', 'auc']]

Unnamed: 0,path,auc
7,11-13-32_0,0.792281
1,11-13-32_1,0.735586
9,11-13-32_2,0.709308
3,11-13-32_3,0.747328
4,13-37-34_0,0.747053
2,13-37-34_1,0.745411
0,13-37-34_2,0.730221
8,13-37-34_3,0.703578
5,13-37-34_4,0.731735
6,13-37-34_5,0.707767


In [32]:
results_df.mean()

auc             0.721352
auc_std         0.004422
accuracy        0.688794
accuracy_std    0.011642
dtype: float64

## posttraining 

In [56]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/secondtraining/on_HCP_half_1_NEOFAC_C/different-lr",
    dataset='cingulate_ACCpatterns_0',
    excluded_conditions=excluded_conditions,
    show=False)

results_df.sort_values(ascending=True, by='path')[['path', 'auc']]

Unnamed: 0,path,auc
6,10-04-19_0,0.921317
1,10-04-19_1,0.902935
9,10-04-19_2,0.900514
4,10-04-19_3,0.882886
3,10-04-19_4,0.849359
8,11-50-29,0.861501
5,11-55-55,0.851509
0,12-02-15,0.867294
7,12-03-07,0.900841
2,15-40-22,0.861085


In [53]:
results_df.sort_values(ascending=True, by='path')[['path', 'auc']].mean()

auc    0.834253
dtype: float64