Notebook to preprocess and display classifiers results for the beta-VAEs computed by Louise.

In [1]:
import os
import json
import pandas as pd
import numpy as np

from scipy.stats import ttest_ind

from contrastive.utils.models_database import
 import_bdd

## Get ACCpatterns1 from ACCpatterns embeddings

In [2]:
acc0_subjects = pd.read_csv("/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/ACCpatterns_1_subject.csv")
acc0_subjects

Unnamed: 0,Subject
0,1-20FORHU_t0
1,1-23CREES_t0
2,2-02FERMA_t0
3,2-04HEBTO_t0
4,2-08BESAG_t0
...,...
166,sub-inhibition23_ses-pretraining_T1w
167,sub-inhibition24_ses-pretraining_T1w
168,sub-inhibition25_ses-pretraining_T1w
169,sub-inhibition28_ses-pretraining_T1w


In [3]:
def get_acc0_embs(embs_path, acc0_embs_path, acc0_subjects=acc0_subjects):
    embs = pd.read_csv(embs_path, index_col=0)
    acc0_embs = embs[embs.index.isin(acc0_subjects.Subject.values)]
    acc0_embs.sort_index(inplace=True)

    # save the acc0 embeddings
    folder_path = '/' + os.path.join(*acc0_embs_path.split('/')[:-1])
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)
    acc0_embs.to_csv(acc0_embs_path)
    return acc0_embs

In [6]:
# for i in range(1,6):
#     embs_path = f"/neurospin/dico/deep_folding/papers/ipmi2023/models/beta-VAE/n_30/#{i}/full_embeddings.csv"
#     acc0_embs_path = '/' + os.path.join(*embs_path.split('/')[:-1]) + '/cingulate_ACCpatterns_1_embeddings/full_embeddings.csv'

#     get_acc0_embs(embs_path, acc0_embs_path)

## Resume beta-VAE results

Visualisation of the latent spaces in latent_space_visualization.ipynb

In [17]:
# compute and plot the dataframe of beta-VAE performances

folder_path = "/neurospin/dico/data/deep_folding/papers/ipmi2023/models/beta-VAE"
dims = [4, 10, 30]
dataset = 'cingulate_ACCpatterns_1'

df = pd.DataFrame(np.zeros((3,1)), index=dims, columns=['beta-VAE'])
std_df = pd.DataFrame(np.zeros((3,1)), index=dims, columns=['beta-VAE'])
bdd = pd.DataFrame(np.zeros((5*len(dims),2)),
                   columns=['num_representation_features', 'auc'])
bdd = bdd.astype({'num_representation_features': int})

for k,dim in enumerate(dims):
    performances = []
    for i in range(1,6):
        values_file = folder_path + f'/n_{dim}/#{i}/{dataset}_embeddings/values.json'
        with open(values_file, 'r') as file:
            values = json.load(file)
            auc = values['cross_val_auc'][0]
            performances.append(auc)
            bdd.loc[5*k+i-1,"auc"] = auc
            bdd.loc[5*k+i-1,"num_representation_features"] = dim
    performances = np.array(performances)
    print(dim, performances)
    df.iloc[k,0] = performances.mean()
    std_df.iloc[k,0] = performances.std()

df

4 [0.6554241  0.68284149 0.47161867 0.66941963 0.68141515]
10 [0.64443584 0.72822729 0.70554459 0.64963629 0.69878921]
30 [0.63314677 0.62513894 0.64832541 0.60871065 0.64623036]


Unnamed: 0,beta-VAE
4,0.632144
10,0.685327
30,0.63231


In [18]:
bdd = bdd.astype({'num_representation_features': str})

In [19]:
bdd.head()

Unnamed: 0,num_representation_features,auc
0,4,0.655424
1,4,0.682841
2,4,0.471619
3,4,0.66942
4,4,0.681415


In [22]:
bdd.to_csv(f"{folder_path}/bdd_beta-VAE_evaluation-ACCpatterns-1.csv",
            index=False)

In [8]:
std_df

Unnamed: 0,beta-VAE
4,0.080867
10,0.032791
30,0.014556


## Comparison with SimCLR convnet

In [9]:
bdd = import_bdd("/neurospin/dico/agaudin/Runs/new_bdd_models_0.csv")
convnets = bdd[(bdd.temperature == 0.1) & (bdd.drop_rate == 0.05) & (bdd.backbone_name == 'convnet')]
convnets

Unnamed: 0,model_path,auc,auc_std,accuracy,accuracy_std,backbone_name,batch_size,drop_rate,early_stopping_patience,foldlabel,...,num_outputs,num_representation_features,temperature,temperature_initial,train_loss,val_loss,exclude,0.1_quantile,dataset_name,git_branch
88,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.778832,0.00302,0.721976,0.010526,convnet,16,0.05,100,True,...,10,10,0.1,0.1,0.433261,0.736669,False,,cingulate_HCP_half_1,Run_03_aymeric
89,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.776295,0.005263,0.748871,0.009153,convnet,16,0.05,100,True,...,10,10,0.1,0.1,0.46212,0.800465,False,,cingulate_HCP_half_1,Run_03_aymeric
34,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.760827,0.004171,0.703388,0.013148,convnet,16,0.05,100,True,...,4,4,0.1,0.1,1.240257,1.66916,False,0.078811,cingulate_HCP_half_1,Run_03_aymeric
66,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.749497,0.005254,0.668212,0.009069,convnet,16,0.05,100,False,...,30,30,0.1,0.1,0.086302,0.153539,False,-0.14738,cingulate_HCP_half_1,Run_03_aymeric
91,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.745395,0.008467,0.671365,0.011422,convnet,16,0.05,100,True,...,10,10,0.1,0.1,0.442286,0.582167,False,,cingulate_HCP_half_1,Run_03_aymeric
53,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.74265,0.001482,0.662259,0.00802,convnet,16,0.05,100,True,...,4,4,0.1,0.1,1.37401,1.463949,False,0.199899,cingulate_HCP_half_1,Run_03_aymeric
45,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.740142,0.013671,0.694188,0.017061,convnet,16,0.05,100,True,...,4,4,0.1,0.1,1.954553,2.959328,False,0.652514,cingulate_HCP_half_1,Run_03_aymeric
54,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.736406,0.00311,0.698071,0.006667,convnet,16,0.05,100,True,...,4,4,0.1,0.1,1.251985,1.929024,False,0.594088,cingulate_HCP_half_1,Run_03_aymeric
44,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.734367,0.004978,0.707529,0.010938,convnet,16,0.05,100,False,...,4,4,0.1,0.1,1.020133,1.204887,False,0.319042,cingulate_HCP_half_1,Run_03_aymeric
90,/neurospin/dico/agaudin/Runs/04_pointnet/Outpu...,0.729305,0.00641,0.673506,0.0141,convnet,16,0.05,100,True,...,10,10,0.1,0.1,0.416806,0.869529,False,,cingulate_HCP_half_1,Run_03_aymeric


In [15]:
dataset = 'cingulate_ACCpatterns_0'

dims = [4, 10, 30]
foldlabels = [True, False]

for dim in dims:
    # load beta-VAE perf
    beta_VAE_perfs = []
    for i in range(1,6):
        values_file = folder_path + f'/n_{dim}/#{i}/{dataset}_embeddings/values.json'
        with open(values_file, 'r') as file:
            values = json.load(file)
            beta_VAE_perfs.append(values['cross_val_auc'][0])
    beta_VAE_perfs = np.array(beta_VAE_perfs)

    for foldlabel in foldlabels:
        # load SimCLR perfs
        SimCLR_perfs = convnets[(convnets.num_representation_features == dim) & (convnets.foldlabel == foldlabel)].auc

        # compute ttest
        print("Conditions:", dim, foldlabel)
        print("Performances: beta-VAE", beta_VAE_perfs.mean(), "; SimCLR", SimCLR_perfs.mean())
        print(ttest_ind(beta_VAE_perfs, SimCLR_perfs, equal_var=False))
        print("")


Conditions: 4 True
Performances: beta-VAE 0.7136981078389529 ; SimCLR 0.7388929292929293
Ttest_indResult(statistic=-2.9871399489719495, pvalue=0.023669163048992123)

Conditions: 4 False
Performances: beta-VAE 0.7136981078389529 ; SimCLR 0.6933689951154739
Ttest_indResult(statistic=1.8915646273774136, pvalue=0.10341873786471027)

Conditions: 10 True
Performances: beta-VAE 0.7163735381988904 ; SimCLR 0.7516682316118934
Ttest_indResult(statistic=-2.46840808108888, pvalue=0.03977247083201307)

Conditions: 10 False
Performances: beta-VAE 0.7163735381988904 ; SimCLR nan
Ttest_indResult(statistic=nan, pvalue=nan)

Conditions: 30 True
Performances: beta-VAE 0.6741881917769241 ; SimCLR 0.6619396215677906
Ttest_indResult(statistic=0.29954619717257025, pvalue=0.7752644001402708)

Conditions: 30 False
Performances: beta-VAE 0.6741881917769241 ; SimCLR 0.6940471759852042
Ttest_indResult(statistic=-0.8613748121764792, pvalue=0.4143813077524445)

