In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import umap
import os
from model_evolution import get_models_path, get_epochs, check_embeddings, loader, chose_target, scale_based_on_UKB, classifier

In [3]:
sns.set_theme(style="darkgrid")

In [4]:
# rootdir = '/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-23/' #Strange one, likely due to branch merging issues 

# rootdir = '/neurospin/dico/adufournet/Runs/01_Heritability_Right_PCS_HCP/Output/2024-05-27/' # very consistent
# rootdir = '/neurospin/dico/adufournet/Runs/01_Heritability_Right_PCS_HCP/Output/2024-05-28/' # very consistent too
# rootdir = '/neurospin/dico/adufournet/Runs/04_Heritability_Right_PCS_HCP_dim10/Output/2024-05-29/' # to try !!!! Strange one too

# rootdir = '/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-29/' # to try !!! very consistent
# rootdir = '/neurospin/dico/adufournet/Runs/05_Heritability_Left_PCS_HCP_dim10/Output/2024-05-23/' # what is expected

# rootdir = '/neurospin/dico/adufournet/mycode/Output/2024-07-22/'
rootdir = '/neurospin/dico/adufournet/mycode/Output//2024-07-23/'

#model_date = rootdir[75:85]
model_date = '2024-07-23'
side = 'LEFT'
region = 'CINGULATE'

In [5]:
def encoder(df, columns):
    for col in columns:
        code = {'present':1,
                'absent':0}
        df[col] = df[col].map(code)
    return df

labels_ACCP = pd.read_csv("/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/subjects_labels.csv")
labels_ACCP = labels_ACCP[['long_name','Left_PCS', 'Right_PCS']]

encoder(labels_ACCP, ['Left_PCS', 'Right_PCS']) 
labels_ACCP['Asymmetry'] = abs(labels_ACCP.Left_PCS - labels_ACCP.Right_PCS)

In [6]:
list_to_drop = ['Asymmetry','Left_PCS','Right_PCS']

list_model = get_models_path(rootdir)   
epochs = get_epochs(f'{rootdir}{list_model[0]}/')
target = 'Left_PCS'

list_of_people_to_follow = ['sub-4662034', 'sub-4168749', 'sub-2815300', 'sub-2302098','sub-5587423', 'sub-5627925', 'sub-4167965', 'sub-5040269', 'sub-4405541', 'sub-1816223', 'sub-4170820']
#Assign a unique color for each person to follow
colors = sns.color_palette(n_colors=len(list_of_people_to_follow))

In [7]:
UKBioBank_only = True

for i, str_model in tqdm(enumerate(list_model)):
    #fig, axs = plt.subplots(ncols=len(epochs), figsize=(16 * len(epochs), 16))
    pred_dic = {}
    for j, epoch in enumerate(np.sort(list(epochs))):
        path = f'{rootdir}{str_model}/'
        #path = str_model+'/'
        if not UKBioBank_only:
            embeddings_ACCP, embeddings_HCP, embeddings_UKB = loader(path, epoch)
            scl_bdd_accp, scl_bdd_hcp, scl_bdd_ukb, scaler = scale_based_on_UKB(embeddings_ACCP, embeddings_HCP, embeddings_UKB)               
        else:
            embeddings_ACCP, embeddings_HCP, embeddings_UKB = loader(path, epoch, ['UKB'])
            scaler = StandardScaler()
            scl_bdd_ukb = scaler.fit_transform(embeddings_UKB)

        reducer = umap.UMAP(n_neighbors=200)
        reducer.fit(scl_bdd_ukb)
            
        bdd_2D_UKB = reducer.transform(scl_bdd_ukb) 
        bdd_2D_UKB = pd.DataFrame(bdd_2D_UKB, columns=['Dim1', 'Dim2'])
        bdd_2D_UKB['Dataset'] = 'UkBioBank'
        bdd_2D_UKB = bdd_2D_UKB.set_index(embeddings_UKB.index)
            
        if not UKBioBank_only:
            bdd_2D_ACCP = reducer.transform(scl_bdd_accp) 
            bdd_2D_HCP = reducer.transform(scl_bdd_hcp)
            bdd_2D_ACCP = pd.DataFrame(bdd_2D_ACCP, columns=['Dim1', 'Dim2'])
            bdd_2D_HCP = pd.DataFrame(bdd_2D_HCP, columns=['Dim1', 'Dim2'])
            bdd_2D_HCP['Dataset'] = 'hcp'
            bdd_2D_ACCP['Dataset'] = 'accp'
            bdd_2D_ACCP = bdd_2D_ACCP.set_index(embeddings_ACCP.index)
            bdd_2D_HCP = bdd_2D_HCP.set_index(embeddings_HCP.index)

        plt.figure(figsize=(12,8))
        sns.kdeplot(
            data=bdd_2D_UKB,
            x="Dim1",
            y="Dim2",
            thresh=.1,
            fill=True, 
            cbar=True
        )
        for idx, people_i in enumerate(list_of_people_to_follow):
            plt.scatter(bdd_2D_UKB.loc[people_i]['Dim1'], bdd_2D_UKB.loc[people_i]['Dim2'], s=10, color=colors[idx], label=f'{people_i}')
        plt.title(f'Model {str_model}, Epoch {epoch}')
            
        custom_lines = [plt.Line2D([0], [0], color=colors[idx], lw=4) for idx in range(len(list_of_people_to_follow))]
        plt.legend(custom_lines, [f'{people_i}' for people_i in list_of_people_to_follow], bbox_to_anchor=(-0.2, 0.5), loc='center right', title="Subjects")
        # Create the directory if it doesn't exist
        save_dir = f'/volatile/ad279118/2023_jlaval_STSbabies/contrastive/notebooks/antoine/{side}_{region}/UMAP_evolution/{model_date}/{str_model}/'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        # Save the figure
        plt.tight_layout()
        plt.savefig(f'{save_dir}/epoch_{epoch}.png')
        plt.show()


0it [00:00, ?it/s]