In [1]:
! pip install jupyterlab-widgets==1.1.1 ipywidgets==7.7.1
! pip install plotly==5.22.0



In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
import umap
import os



### In this example, the embeddings are generated from a Barlow Twin model trained on the Left CINGULATE region on UKB for the UKB and HCP datasets.

 -For the HCP dataset, the associated buckets can be found in: 

/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets

 -For the UKB dataset, the buckets are not generated yet, but it would be found in:

/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets

### Load your data

In [3]:
embeddings_HCP = pd.read_csv("/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-13/09-33-29_206/hcp_epoch60_embeddings/full_embeddings.csv", index_col=0)
embeddings_UKB = pd.read_csv("/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-13/09-33-29_206/UKB_epoch60_embeddings/full_embeddings.csv", index_col=0)
embeddings_UKB.head()

embeddings_HCP = embeddings_HCP
embeddings_UKB = embeddings_UKB
embeddings_UKB.head()

Unnamed: 0_level_0,dim1,dim2,dim3,dim4,dim5,dim6,dim7,dim8,dim9,dim10,...,dim247,dim248,dim249,dim250,dim251,dim252,dim253,dim254,dim255,dim256
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
sub-1000021,3.411522,-3.809216,15.403873,9.348969,0.192424,-0.495174,-10.625201,21.913597,15.799512,-3.020029,...,4.427488,62.63322,11.167557,-12.174602,10.930779,-31.395472,-20.459242,4.932469,-7.491192,-0.486337
sub-1000458,-47.31498,-16.875847,6.268302,-14.805659,-3.636524,-15.009971,-25.361504,20.222443,10.710242,-7.875286,...,-5.78848,20.70334,20.576696,-21.549343,25.2768,-30.138172,-13.101314,4.573277,7.83018,-26.36402
sub-1000575,-20.057571,-0.517332,11.068587,27.307678,-15.650618,-12.777596,22.677881,5.150224,33.19059,-19.315678,...,24.144289,30.855265,-0.058306,-10.811748,19.109825,-10.972364,-2.82663,-33.128902,-15.561665,-15.827283
sub-1000606,-38.27792,4.732678,12.894776,-9.342887,-0.053353,-22.454464,7.657977,-4.370313,12.114909,-5.55254,...,-17.152739,47.780556,12.951429,0.663262,-11.14682,-12.12998,12.698999,0.3342,29.254696,-1.875374
sub-1000963,-3.616922,-4.445605,-14.435362,-35.12253,8.104991,-0.341337,-19.55515,-27.789082,-8.176664,23.098358,...,-37.479347,25.96218,-5.279437,-14.123582,8.515189,-30.010107,9.012834,-1.461584,-14.874557,-4.218939


### Scale your data

In [4]:
scaler = StandardScaler()
scaler.fit(embeddings_UKB)

scl_bdd_hcp = scaler.transform(embeddings_HCP)
scl_bdd_ukb = scaler.transform(embeddings_UKB)

### 2D UMAP

In [5]:
reducer2D = umap.UMAP(n_components=2)

reducer2D.fit(scl_bdd_ukb)

bdd_2D_HCP = reducer2D.transform(scl_bdd_hcp)
bdd_2D_UKB = reducer2D.transform(scl_bdd_ukb)



In [6]:
bdd_2D_HCP = pd.DataFrame(bdd_2D_HCP, columns=['Dim 1', 'Dim 2'])
bdd_2D_UKB = pd.DataFrame(bdd_2D_UKB, columns=['Dim 1', 'Dim 2'])

bdd_2D_HCP['Dataset'] = 'HCP'
bdd_2D_UKB['Dataset'] = 'UKB'

bdd_2D_HCP['ID'] = embeddings_HCP.index
bdd_2D_UKB['ID'] = embeddings_UKB.index

bdd_2D_All = pd.concat([bdd_2D_UKB,bdd_2D_HCP], axis=0)
bdd_2D_All.head()

Unnamed: 0,Dim 1,Dim 2,Dataset,ID
0,7.346906,3.017047,UKB,sub-1000021
1,1.489729,3.597837,UKB,sub-1000458
2,0.088437,5.863735,UKB,sub-1000575
3,-0.284873,5.094411,UKB,sub-1000606
4,3.887578,2.296591,UKB,sub-1000963


### Plot it in the notebook or write a html file

In [7]:
subject_id_list = []
dataset_name_list = []

In [8]:
# Create the scatter plot using plotly express
fig = px.scatter(
    bdd_2D_All, x='Dim 1', y='Dim 2', 
    color='Dataset',
    title='2D UMAP HCP and UKB',
    labels={'0': 'dim 1', '1': 'dim 2'},
    hover_data= ['Dataset', 'ID'],
    opacity=0.5,
    width=800, height=600
)

# Convert the figure to a FigureWidget
f = go.FigureWidget(fig)

# Define the callback function
def click_callback(trace, points, selector):
    for trace_index in range(len(f.data)):
        if trace_index == points.trace_index:
            customdata = f.data[trace_index].customdata
            for i in points.point_inds:
                point_dataset, point_id = customdata[i]
                print(f"Clicked point ID: {point_id}, Dataset: {point_dataset}")
                subject_id_list.append(point_id)
                dataset_name_list.append(point_dataset)

# Attach the callback to the on_click event for all traces
for trace in f.data:
    trace.on_click(click_callback)

# Display the figure widget
f

FigureWidget({
    'data': [{'customdata': array([['UKB', 'sub-1000021'],
                                   ['UKB', 'sub-1000458'],
                                   ['UKB', 'sub-1000575'],
                                   ...,
                                   ['UKB', 'sub-6023847'],
                                   ['UKB', 'sub-6024038'],
                                   ['UKB', 'sub-6024754']], dtype=object),
              'hovertemplate': ('Dataset=%{customdata[0]}<br>Di' ... '{customdata[1]}<extra></extra>'),
              'legendgroup': 'UKB',
              'marker': {'color': '#636efa', 'opacity': 0.5, 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'UKB',
              'showlegend': True,
              'type': 'scattergl',
              'uid': 'f7fd3317-93fb-4087-8c0e-1a786ae26bea',
              'x': array([7.3469057 , 1.489729  , 0.08843696, ..., 3.9168212 , 3.9518468 ,
                          3.3499331 ], dtype=float32),
              '

Clicked point ID: 178849, Dataset: HCP
Clicked point ID: 733548, Dataset: HCP
Clicked point ID: 145834, Dataset: HCP
Clicked point ID: 154229, Dataset: HCP
Clicked point ID: 346137, Dataset: HCP
Clicked point ID: 172635, Dataset: HCP
Clicked point ID: 112112, Dataset: HCP
Clicked point ID: 129634, Dataset: HCP
Clicked point ID: 919966, Dataset: HCP
Clicked point ID: 138837, Dataset: HCP
Clicked point ID: 239136, Dataset: HCP
Clicked point ID: 208024, Dataset: HCP
Clicked point ID: sub-3361652, Dataset: UKB
Clicked point ID: sub-3941879, Dataset: UKB
Clicked point ID: sub-3622358, Dataset: UKB
Clicked point ID: sub-2969936, Dataset: UKB
Clicked point ID: sub-1833987, Dataset: UKB
Clicked point ID: sub-4459352, Dataset: UKB
Clicked point ID: sub-5797043, Dataset: UKB
Clicked point ID: sub-3333424, Dataset: UKB
Clicked point ID: sub-1353678, Dataset: UKB
Clicked point ID: sub-2781635, Dataset: UKB
Clicked point ID: sub-5372068, Dataset: UKB
Clicked point ID: sub-1583354, Dataset: UKB


In [14]:
side = "L"
region = "CINGULATE"

bucket_files = []

for subject_id, dataset in zip(subject_id_list,dataset_name_list):
    if dataset.lower() in ['ukb', 'ukbiobank']:
        dataset = 'UkBioBank'
        path = f'/neurospin/dico/data/deep_folding/current/datasets/{dataset}/crops/2mm/{region}/mask/{side}buckets'

    if dataset.lower() in ['hcp']:
        path = f'/neurospin/dico/data/deep_folding/current/datasets/{dataset.lower()}/crops/2mm/{region}/mask/{side}buckets'

    filename = f'{path}/{subject_id}_cropped_skeleton.bck'#.minf'

    if os.path. isfile(filename):
        bucket_files.append(filename)
    else:
        print(f"{filename} is not a correct path, or the .bck doesn't exist")
bucket_files

/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets/sub-3361652_cropped_skeleton.bck is not a correct path, or the .bck doesn't exist
/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets/sub-3941879_cropped_skeleton.bck is not a correct path, or the .bck doesn't exist
/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets/sub-3622358_cropped_skeleton.bck is not a correct path, or the .bck doesn't exist
/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets/sub-2969936_cropped_skeleton.bck is not a correct path, or the .bck doesn't exist
/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets/sub-1833987_cropped_skeleton.bck is not a correct path, or the .bck doesn't exist
/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets/sub-4459352_cropped_

['/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/178849_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/733548_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/145834_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/154229_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/346137_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/172635_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/112112_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets/129634_cropped_skeleton.bck',
 '/neurospin/dico/data/deep_fold

In [10]:
import anatomist.api as ana
from soma.qt_gui.qtThread import QtThreadCall
from soma.qt_gui.qt_backend import Qt

a = ana.Anatomist()

existing QApplication: 0
QStandardPaths: XDG_RUNTIME_DIR not set, defaulting to '/tmp/runtime-ad279118'


global modules: /casa/host/build/share/anatomist-5.2/python_plugins
home   modules: /casa/home/.anatomist/python_plugins
create qapp
done
Starting Anatomist.....
config file : /casa/home/.anatomist/config/settings.cfg
PyAnatomist Module present
PythonLauncher::runModules()
loading module simple_controls
loading module save_resampled
loading module selection
loading module bsa_proba
loading module modelGraphs
loading module profilewindow
loading module ana_image_math
loading module paletteViewer
loading module foldsplit
loading module anacontrolmenu
loading module gradientpalette
loading module palettecontrols
loading module meshsplit
loading module volumepalettes
loading module gltf_io
loading module infowindow
loading module histogram
loading module statsplotwindow
loading module valuesplotwindow
all python modules loaded
Anatomist started.


In [15]:
block = a.createWindowsBlock(4) # 4 columns
d = {}

for i, file in enumerate(bucket_files):
    d[f'bck_{i}'] = a.loadObject(file)
    d[f'w_{i}'] = a.createWindow('3D', block=block)#geometry=[100+400*(i%3), 100+440*(i//3), 400, 400])
    d[f'w_{i}'].addObjects(d[f'bck_{i}'])

Multitexturing present
function glActiveTexture found.
function glClientActiveTexture found.
function glBlendEquation found.
function glTexImage3D found.
function glMultiTexCoord3f found.
function glBindFramebuffer found.
function glBindRenderbuffer found.
function glFramebufferTexture2D found.
function glGenFramebuffers found.
function glGenRenderbuffers found.
function glFramebufferRenderbuffer found.
function glRenderbufferStorage found.
function glCheckFramebufferStatus found.
function glDeleteRenderbuffers found.
function glDeleteFramebuffers found.
Number of texture units: 8
function glUniform1f found.
function glUniform1i found.
function glUniform4fv found.
function glGetUniformLocation found.
function glMultTransposeMatrixf found.
function glAttachShader found.
function glDetachShader found.
function glCompileShader found.
function glCreateProgram found.
function glCreateShader found.
function glDeleteProgram found.
function glDeleteShader found.
function glGetProgramiv found.


In [12]:
from soma import aims

In [16]:
mm_skeleton_path = f"/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/{region}/mask/{side}crops"

dic_vol = {}
dim = 0
rep = 0
while dim == 0 and rep < len(subject_id_list):
    if os.path. isfile(f'{mm_skeleton_path}/{subject_id_list[rep]}_cropped_skeleton.nii.gz'):
        dim = aims.read(f'{mm_skeleton_path}/{subject_id_list[rep]}_cropped_skeleton.nii.gz').np.shape
        sum_vol = np.zeros(shape=dim)
    else: 
        print(f'FileNotFound {mm_skeleton_path}/{subject_id_list[rep]}_cropped_skeleton.nii.gz')
        #raise FileNotFoundError(f'{mm_skeleton_path}/{subject_id_list[0]}_cropped_skeleton.nii.gz')
    rep += 1

for subject_id, dataset in zip(subject_id_list,dataset_name_list):
    if dataset.lower() in ['ukb', 'ukbiobank']:
        dataset = 'UkBioBank'
    elif dataset.lower() == 'hcp':
        dataset = 'hcp'
        
    mm_skeleton_path = f"/neurospin/dico/data/deep_folding/current/datasets/{dataset}/crops/2mm/{region}/mask/{side}crops"

    if os.path. isfile(f'{mm_skeleton_path}/{subject_id}_cropped_skeleton.nii.gz'):
        vol = aims.read(f'{mm_skeleton_path}/{subject_id}_cropped_skeleton.nii.gz')
        # compare the dim with the first file dim

        if vol.np.shape != dim:
            raise ValueError(f"{subject_id_list[0]} and {subject_id} must have the same dim")

            
        # to have a binary 3D structure
        dic_vol[subject_id] = (vol.np > 0).astype(int) 
        sum_vol += (vol.np > 0).astype(int) 
    else: 
        print(f'FileNotFound {mm_skeleton_path}/{subject_id}_cropped_skeleton.nii.gz')
        #raise FileNotFoundError(f'{mm_skeleton_path}/{subject_id}_cropped_skeleton.nii.gz')

sum_vol = sum_vol/len(subject_id_list)
#print(dic_vol[subject_id_list[0]].shape)
#print(np.count_nonzero(dic_vol[subject_id_list[0]]))

FileNotFound /neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lcrops/178849_cropped_skeleton.nii.gz
FileNotFound /neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lcrops/733548_cropped_skeleton.nii.gz
FileNotFound /neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lcrops/145834_cropped_skeleton.nii.gz
FileNotFound /neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lcrops/154229_cropped_skeleton.nii.gz
FileNotFound /neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lcrops/346137_cropped_skeleton.nii.gz
FileNotFound /neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lcrops/172635_cropped_skeleton.nii.gz
FileNotFound /neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lcrops/112112_cropped_skeleton.nii.gz
FileNotFound /neurospin/dico/data/deep_fo

In [17]:
# Create axis
axes = list(dim[:3])
dim1 = list(dim[:3])[0]
dim2 = list(dim[:3])[1]
dim3 = list(dim[:3])[2]

# Create Data
data = sum_vol.reshape(list(dim[:3]))

X, Y, Z = np.mgrid[0:dim1:1, 0:dim2:1, 0:dim3:1]
values = np.flip(data, axis=[1,2])

fig = go.Figure(data=go.Volume(
    x=X.flatten(),
    y=Y.flatten(),
    z=Z.flatten(),
    value=values.flatten(),
    isomin=0.15,
    isomax=0.99,
    opacity=0.1, # needs to be small to see through all surfaces
    surface_count=17, # needs to be a large number for good volume rendering
    ))
fig.show()

In [None]:
"""
data_filenames_str = ' '.join(data_filenames_list)

#os.system(f'anatomist --input {data_filenames_str}')

# Command to run the Python script inside the container
command = f'bv bash -c "python visu_anatomist.py {data_filenames_str}"'

# Execute the command
os.system(command)
"""

In [31]:
# See for more information
# https://plotly.com/python/line-and-scatter/
# https://plotly.com/python/setting-graph-size/
# app.run_server(debug=True)