Il s'agit de générer les UMAPs à 20 dimensions, fitter sur UKBIOBANK et ensuite transform sur UKBIOBANK et les autres bases, en l'occurence synesthete et HCP

In [14]:
import glob
import os
import omegaconf

import pandas as pd
import numpy as np
import random

import logging

from umap import UMAP

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_predict, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.decomposition import PCA

import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns


logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)


In [15]:
n_neighbors = 15
n_components = 20

path_champollion = "/neurospin/dico/data/deep_folding/current/models/Champollion_V0"
synesthetes_embeddings_subpath = "synesthetes_random_embeddings"
hcp_embeddings_subpath = "hcp_random_embeddings"
syn_embeddings_subpath = "synesthetes_random_embeddings"
ukb_embeddings_subpath = "ukb_random_embeddings"
input_embeddings = "full_embeddings.csv"
output_embeddings = f"umap_c{n_components}_n{n_neighbors}_embeddings.csv"


In [16]:
def compute_umap(df, n_neighbors=15, n_components=2):
    # apply umap
    reducer = UMAP(n_neighbors=n_neighbors, n_components=n_components)
    visual_embs = reducer.fit_transform(df)
    return visual_embs, reducer

In [17]:
def is_it_a_file(sub_dir):
    if os.path.isdir(sub_dir):
        return False
    else:
        logging.debug(f"{sub_dir} is a file. Continue.")
        return True
    

def is_folder_a_model(sub_dir):
    if os.path.exists(sub_dir+'/.hydra/config.yaml'):
        return True
    else:
        logging.debug(f"\n{sub_dir} not associated to a model. Continue")
        return False

def get_model_paths(dir_path, result = None):
    """Recursively gets all models included in dir_path"""
    if result is None:  # create a new result if no intermediate was given
        result = [] 
    for name in os.listdir(dir_path):
        sub_dir = dir_path + '/' + name
        # checks if directory
        if is_it_a_file(sub_dir):
            pass
        elif not is_folder_a_model(sub_dir):
            result.extend(get_model_paths(sub_dir))
        else:
            result.append(sub_dir)
    return result

In [18]:
model_paths = get_model_paths(path_champollion)

In [19]:
model_paths

['/neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_left/11-43-38_2',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_left/14-00-57_116',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/SFinter-SFsup_left/09-33-02_0',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/STi-STs-STpol_right/20-15-00_117',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/FColl-SRh_right/09-45-57_1',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/STs-SGSM_left/13-57-40_59',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_right/11-43-38_3',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_right/13-19-08_28',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/Lobule_parietal_sup_left/09-48-31_0',
 '/neurospin/dico/data/deep_folding/current/models/Champollion_V0/SFint-SR_left/09-00-43_0',
 '/neurospin/dico/data/deep_folding/curren

In [20]:
def compute(model_path):
    print("Treating " + model_path)
    # Builds input file names
    ukb_embeddings = f"{model_path}/{ukb_embeddings_subpath}/{input_embeddings}"
    synesthetes_embeddings = f"{model_path}/{synesthetes_embeddings_subpath}/{input_embeddings}"
    hcp_embeddings = f"{model_path}/{hcp_embeddings_subpath}/{input_embeddings}"

    # Builds output file names
    ukb_umap_file = f"{model_path}/{ukb_embeddings_subpath}/{output_embeddings}"
    synesthetes_umap_file = f"{model_path}/{synesthetes_embeddings_subpath}/{output_embeddings}"
    hcp_umap_file = f"{model_path}/{hcp_embeddings_subpath}/{output_embeddings}" 

    # Loads embeddings
    ukb_emb = pd.read_csv(ukb_embeddings, index_col=0)
    synesthetes_emb = pd.read_csv(synesthetes_embeddings, index_col=0)
    hcp_emb = pd.read_csv(hcp_embeddings, index_col=0)

    if os.path.isfile(ukb_umap_file) and os.path.isfile(synesthetes_umap_file) and os.path.isfile(hcp_umap_file):
        pass
    else:
        # Fits umap on UkBioBank
        ukb_umap, reducer = compute_umap(ukb_emb, n_neighbors=n_neighbors, n_components=n_components)
        ukb_umap = pd.DataFrame(ukb_umap)
        ukb_umap.index = ukb_emb.index
        ukb_umap.to_csv(ukb_umap_file)

        # Transforms umap on other datasets
        synesthetes_umap = pd.DataFrame(reducer.transform(synesthetes_emb))
        synesthetes_umap.index = synesthetes_emb.index
        synesthetes_umap.to_csv(synesthetes_umap_file)

        hcp_umap = pd.DataFrame(reducer.transform(hcp_emb))
        hcp_umap.index = hcp_emb.index
        hcp_umap.to_csv(hcp_umap_file)





In [21]:
# Computes UMAP embeddings
for model_path in model_paths:
    compute(model_path)

Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_left/11-43-38_2
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_left/14-00-57_116
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SFinter-SFsup_left/09-33-02_0
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/STi-STs-STpol_right/20-15-00_117
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/FColl-SRh_right/09-45-57_1
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/STs-SGSM_left/13-57-40_59




Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_right/11-43-38_3
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SC-sylv_right/13-19-08_28
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/Lobule_parietal_sup_left/09-48-31_0
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SFint-SR_left/09-00-43_0
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SPoC_left/22-10-40_0
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SPoC_left/15-39-28_1
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SPoC_left/10-18-59_2
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SPoC_left/15-39-28_0
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SPoC_left/10-18-59_0
Treating /neurospin/dico/data/deep_folding/current/models/Champollion_V0/SPoC_left/10-18-59_1
Treating /neurospin/dico/data/de