Notebook to preprocess and display classifiers results for the beta-VAEs computed by Louise.

In [19]:
import os
import json
import pandas as pd
import numpy as np

## Get ACCpatterns0 from ACCpatterns embeddings

In [11]:
acc0_subjects = pd.read_csv("/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/ACCpatterns_0_subject.csv")
acc0_subjects

Unnamed: 0,Subject
0,1-03HUBJO_t0
1,1-08ANDTI_t0
2,1-11LEBJO_t0
3,1-15LEHMI_t0
4,1-17COLMA_t0
...,...
165,sub-inhibition17_ses-pretraining_T1w
166,sub-inhibition18_ses-pretraining_T1w
167,sub-inhibition22_ses-pretraining_T1w
168,sub-inhibition26_ses-pretraining_T1w


In [12]:
def get_acc0_embs(embs_path, acc0_embs_path, acc0_subjects=acc0_subjects):
    embs = pd.read_csv(embs_path, index_col=0)
    acc0_embs = embs[embs.index.isin(acc0_subjects.Subject.values)]
    acc0_embs.sort_index(inplace=True)

    # save the acc0 embeddings
    folder_path = '/' + os.path.join(*acc0_embs_path.split('/')[:-1])
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)
    acc0_embs.to_csv(acc0_embs_path)
    return acc0_embs

In [24]:
for i in range(1,6):
    embs_path = f"/neurospin/dico/lguillon/collab_joel_aymeric_cingulate/n_10/#{i}/full_embeddings.csv"
    acc0_embs_path = '/' + os.path.join(*embs_path.split('/')[:-1]) + '/cingulate_ACCpatterns_0_embeddings/full_embeddings.csv'

    get_acc0_embs(embs_path, acc0_embs_path)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


## Resume beta-VAE results

Visualisation of the latent spaces in latent_space_visualization.ipynb

In [27]:
# compute and plot the dataframe of beta-VAE performances

folder_path = "/neurospin/dico/lguillon/collab_joel_aymeric_cingulate"
dims = [4, 10, 30]
dataset = 'cingulate_ACCpatterns_0'

df = pd.DataFrame(np.zeros((3,1)), index=dims, columns=['beta-VAE'])
std_df = pd.DataFrame(np.zeros((3,1)), index=dims, columns=['beta-VAE'])

for k,dim in enumerate(dims):
    performances = []
    for i in range(1,6):
        values_file = folder_path + f'/n_{dim}/#{i}/{dataset}_embeddings/values.json'
        with open(values_file, 'r') as file:
            values = json.load(file)
            performances.append(values['cross_val_auc'][0])
    performances = np.array(performances)
    df.iloc[k,0] = performances.mean()
    std_df.iloc[k,0] = performances.std()

df

Unnamed: 0,beta-VAE
4,0.713698
10,0.716374
30,0.674188


In [28]:
std_df

Unnamed: 0,beta-VAE
4,0.008024
10,0.01827
30,0.034307
