Notebook where to plot the curves and display the performance of all models in 
a chosen folder. It is also where to conduct the performance analysis about the
models database.

In [7]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpi
import json


from contrastive.utils.config import get_config_diff

## Function definitions

In [8]:
def exclude_conditions(diff_config, excluded_conditions):
    excluded_subdirs = []
    for cond in excluded_conditions:
        filtered_diff_config = diff_config.query(cond)
        excluded_subdirs.extend(filtered_diff_config['Unnamed: 0'].values)
    return excluded_subdirs
#exclude_conditions(diff_config, excluded_conditions=["lr != 0.0004"])

In [9]:
def show_results(dir_path, figsize=(16,10), return_config_diff=False, dataset='cingulate_ACCpatterns_0', excluded_conditions=[], show=True, key_name=""):
    # be sure that all models are comparable
    get_config_diff(dir_path, whole_config=False)
    diff_config = pd.read_csv(dir_path+'/config_diff.csv')

    excluded_subdirs = exclude_conditions(diff_config, excluded_conditions)
    if excluded_conditions != []:
        print("Excluded subdirs :", excluded_subdirs)
        print("Excluded because of ", excluded_conditions)

    if show:
        print("diff_config")
        print(diff_config[~diff_config['Unnamed: 0'].isin(excluded_subdirs)])

    paths = []
    if key_name:
        keys = []
    accuracy = []
    accuracy_std = []
    auc = []
    auc_std = []
    # plot all ROC curves
    for subdir in os.listdir(dir_path):
        if os.path.isdir(dir_path + '/' + subdir) and subdir not in excluded_subdirs:
            path = dir_path + '/' + subdir + '/' + dataset + '_embeddings'
            with open(path+'/values.json', 'r') as file:
                values = json.load(file)
            # print(subdir)
            # print("AUC", values['cross_val_auc'])
            if show:
                img = mpi.imread(path+'/cross_val_ROC_curves.png')
                plt.figure(figsize=figsize)
                plt.imshow(img)
                plt.show()

            # store the AUC and accuracy in a df
            paths.append(subdir)
            if key_name:
                key = diff_config[diff_config['Unnamed: 0'] == subdir][key_name].values[0]
            # print(f"subdir = {subdir}; batch_size = {batch_size}; diff_config = {diff_config[diff_config['Unnamed: 0'] == subdir]}")
            if key_name:
                keys.append(key)
            # print(diff_config[diff_config['Unnamed: 0'] == subdir])
            accuracy.append(values['cross_val_total_accuracy'][0])
            accuracy_std.append(values['cross_val_total_accuracy'][1])
            auc.append(values['cross_val_auc'][0])
            auc_std.append(values['cross_val_auc'][1])

    results_df = pd.DataFrame()
    if key_name:
        print(keys)
        results_df[key_name] = keys
    results_df['path'] = paths
    results_df['auc'] = auc
    results_df['auc_std'] = auc_std
    results_df['accuracy'] = accuracy
    results_df['accuracy_std'] = accuracy_std


    if return_config_diff:
        return results_df, diff_config
    else:
        return results_df

## Gender: temp=0.5, prop=0.5, trained on HCP-haf-2

In [10]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_2/supervised/temp-0.5_prop-0.5",
    dataset='cingulate_ACCpatterns_1',
    excluded_conditions=excluded_conditions,
    show=False)

results_df

FileNotFoundError: [Errno 2] No such file or directory: '/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_2/supervised/temp-0.5_prop-0.5'

In [None]:
results_df.sort_values(ascending=False, by='auc')[['path', 'auc']]

Unnamed: 0,path,auc
2,15-06-16_2,0.72019
0,15-06-16_1,0.687728
1,15-06-16_0,0.684006
4,15-06-16_3,0.673402
3,15-06-16_4,0.621985


In [None]:
results_df.sort_values(ascending=False, by='auc')[['path', 'auc']].mean()

auc    0.677462
dtype: float64

## Gender: temp=0.1,0.3,0.5, prop=0.5,0.8 trained on HCP-haf-1

In [None]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/supervised",
    dataset='cingulate_ACCpatterns_0',
    excluded_conditions=excluded_conditions,
    show=False)

results_df

FileNotFoundError: [Errno 2] No such file or directory: '/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/supervised/gender/cingulate_ACCpatterns_0_embeddings/values.json'

In [None]:
results_df.sort_values(ascending=False, by='auc')[['path', 'auc']]

Unnamed: 0,path,auc
2,16-26-39_0,0.718857
0,16-26-39_3,0.716612
1,16-26-39_2,0.688256
3,16-26-39_1,0.681777


## NEOFAC_C

In [20]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/secondtraining/on_HCP_half_1_NEOFAC_C/chosen_model",
    dataset='cingulate_ACCpatterns_0',
    excluded_conditions=excluded_conditions,
    show=False)

results_df.sort_values(ascending=True, by='path')[['path', 'auc']]

FileNotFoundError: [Errno 2] No such file or directory: '/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_1/secondtraining/on_HCP_half_1_NEOFAC_C/chosen_model/config_diff.csv'

In [14]:
results_df.mean()

auc             0.702513
auc_std         0.003516
accuracy        0.652235
accuracy_std    0.009802
dtype: float64

## fusion

In [None]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/fusion",
    dataset='cingulate_ACCpatterns_1',
    excluded_conditions=excluded_conditions,
    show=False)

results_df.sort_values(ascending=True, by='path')

Unnamed: 0,path,auc,auc_std,accuracy,accuracy_std
0,#1,0.701585,0.008265,0.639392,0.013598
3,#2,0.747525,0.005413,0.661684,0.010729
2,#3,0.717127,0.00484,0.658222,0.008781
4,#4,0.689954,0.003392,0.65876,0.00782
1,#5,0.68388,0.004237,0.636889,0.007387


In [None]:
results_df.sort_values(ascending=True, by='path')[['path', 'auc']].mean()

auc    0.708014
dtype: float64

On compare avec SimCLR utilisé pour la fusion:

In [None]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/contrastive/trained_on_HCP_half_2/all/foldlabel/convnet-10",
    dataset='cingulate_HCP_half_2',
    excluded_conditions=excluded_conditions,
    show=False)

results_df.sort_values(ascending=True, by='path')

Unnamed: 0,path,auc,auc_std,accuracy,accuracy_std
4,09-21-24_0,0.497888,0.026516,0.54041,0.006233
2,09-21-24_1,0.508995,0.026653,0.541704,0.006163
0,09-21-24_2,0.504336,0.016319,0.547303,0.007105
1,09-21-24_3,0.49578,0.031907,0.538677,0.009163
3,09-21-24_4,0.498287,0.010528,0.541686,0.005154


On compare avec beta-VAE utilisé pour la fusion

In [None]:
excluded_conditions = []

results_df = show_results(
    dir_path="/neurospin/dico/data/deep_folding/papers/ipmi2023/models/beta-VAE/n_10",
    dataset='cingulate_ACCpatterns_1',
    excluded_conditions=excluded_conditions,
    show=False)

results_df.sort_values(ascending=True, by='path')

Unnamed: 0,path,auc,auc_std,accuracy,accuracy_std
0,#1,0.680198,0.000255,0.677146,0.002374
3,#2,0.736819,0.000464,0.712795,0.002421
2,#3,0.731722,0.000301,0.696,0.000901
4,#4,0.660131,0.000258,0.654971,0.0
1,#5,0.717993,0.000803,0.676398,0.002811
