In [1]:
# ! pip install plotly==5.22.0
# ! pip install jupyterlab-widgets==1.1.1 ipywidgets==7.7.2
# #! pip install jupyterlab-widgets==3.0.11 ipywidgets==8.1.3
# ! pip install umap-learn
! jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
import umap
import os



In [3]:
#! jupyter labextension list
#! jupyter nbextension list

### In this example, the embeddings are generated from a Barlow Twin model trained on the Left CINGULATE region on UKB for the UKB and HCP datasets.

 -For the HCP dataset, the associated buckets can be found in: 

/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/CINGULATE/mask/Lbuckets

 -For the UKB dataset, the buckets are not generated yet, but it would be found in:

/neurospin/dico/data/deep_folding/current/datasets/UkBioBank/crops/2mm/CINGULATE/mask/Lbuckets

### Load your data

In [4]:
#embeddings_HCP = pd.read_csv("/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-13/09-33-29_206/hcp_epoch60_embeddings/full_embeddings.csv", index_col=0)
#embeddings_UKB = pd.read_csv("/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-13/09-33-29_206/UKB_epoch60_embeddings/full_embeddings.csv", index_col=0)

embeddings_HCP = pd.read_csv("/neurospin/dico/adufournet/Runs/08_Heritability_Left_Orbital_HCP_dim256/Output/20-56-02_1/HCP_random_epoch70_embeddings/full_embeddings.csv", index_col=0)
embeddings_UKB = pd.read_csv("/neurospin/dico/adufournet/Runs/08_Heritability_Left_Orbital_HCP_dim256/Output/20-56-02_1/UKB_random_epoch70_embeddings/full_embeddings.csv", index_col=0)
embeddings_UKB.head()

embeddings_HCP = embeddings_HCP
embeddings_UKB = embeddings_UKB
embeddings_UKB.head()

Unnamed: 0_level_0,dim1,dim2,dim3,dim4,dim5,dim6,dim7,dim8,dim9,dim10,...,dim247,dim248,dim249,dim250,dim251,dim252,dim253,dim254,dim255,dim256
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
sub-1000021,-0.223267,-1.910213,-4.233967,23.503742,11.443573,8.791649,-19.330914,9.398719,12.036561,20.609138,...,17.275707,19.63739,23.90228,6.220816,-1.704384,8.290184,-10.330546,1.080888,10.457475,-3.583809
sub-1000458,-1.487145,-11.374003,22.499014,12.430385,13.282042,6.232183,-9.554652,11.808379,11.785681,-1.828715,...,10.948745,3.683071,13.204297,3.243382,2.440976,-21.18743,-19.015543,-8.355005,-5.511065,14.558921
sub-1000575,6.581356,1.730981,23.259317,-16.387571,-1.729305,-7.419609,12.355763,-17.623604,4.551753,18.27951,...,16.038265,21.737503,-6.22868,23.004562,15.599153,14.835078,20.791811,-9.450774,16.728674,3.886697
sub-1000606,-10.52922,-0.98731,27.11244,7.748513,12.663793,-8.886922,21.285864,5.773341,-0.787316,6.343801,...,-2.380117,6.828645,20.211283,5.759082,5.581012,-3.729947,-23.506893,1.081653,18.528488,-3.506482
sub-1000963,-1.465717,3.356938,18.71619,13.292236,-8.116364,3.039848,-8.254716,27.690603,5.612602,23.423695,...,6.647481,9.744263,33.864754,-0.016435,11.216681,-15.389091,-19.265171,11.988579,2.000806,-2.332962


### Scale your data

In [5]:
scaler = StandardScaler()
scaler.fit(embeddings_UKB)

scl_bdd_hcp = scaler.transform(embeddings_HCP)
scl_bdd_ukb = scaler.transform(embeddings_UKB)

### 2D UMAP

In [6]:
reducer2D = umap.UMAP(n_components=2)

reducer2D.fit(scl_bdd_ukb)

bdd_2D_HCP = reducer2D.transform(scl_bdd_hcp)
bdd_2D_UKB = reducer2D.transform(scl_bdd_ukb)



In [7]:
bdd_2D_HCP = pd.DataFrame(bdd_2D_HCP, columns=['Dim 1', 'Dim 2'])
bdd_2D_UKB = pd.DataFrame(bdd_2D_UKB, columns=['Dim 1', 'Dim 2'])

bdd_2D_HCP['Dataset'] = 'hcp'
bdd_2D_UKB['Dataset'] = 'UkBioBank'

bdd_2D_HCP['ID'] = embeddings_HCP.index
bdd_2D_UKB['ID'] = embeddings_UKB.index

bdd_2D_All = pd.concat([bdd_2D_UKB,bdd_2D_HCP], axis=0)
bdd_2D_All.head()

Unnamed: 0,Dim 1,Dim 2,Dataset,ID
0,4.135791,3.933747,UkBioBank,sub-1000021
1,0.627991,3.181151,UkBioBank,sub-1000458
2,0.157519,1.94968,UkBioBank,sub-1000575
3,6.662886,4.46904,UkBioBank,sub-1000606
4,4.096571,5.156266,UkBioBank,sub-1000963


### Plot it in the notebook or write a html file

In [17]:
subject_id_list = []
dataset_name_list = []

In [18]:
# Create the scatter plot using plotly express
fig = px.scatter(
    bdd_2D_All, x='Dim 1', y='Dim 2', 
    color='Dataset',
    title='2D UMAP HCP and UKB',
    labels={'0': 'dim 1', '1': 'dim 2'},
    hover_data= ['Dataset', 'ID'],
    opacity=0.5,
    width=800, height=600
)

#fig.show()

# Convert the figure to a FigureWidget
f = go.FigureWidget(fig)

# Define the callback function
def click_callback(trace, points, selector):
    for trace_index in range(len(f.data)):
        if trace_index == points.trace_index:
            customdata = f.data[trace_index].customdata
            for i in points.point_inds:
                point_dataset, point_id = customdata[i]
                print(f"Clicked point ID: {point_id}, Dataset: {point_dataset}")
                subject_id_list.append(point_id)
                dataset_name_list.append(point_dataset)

# Attach the callback to the on_click event for all traces
for trace in f.data:
    trace.on_click(click_callback)

# Display the figure widget
f

FigureWidget({
    'data': [{'customdata': array([['UkBioBank', 'sub-1000021'],
                                   ['UkBioBank', 'sub-1000458'],
                                   ['UkBioBank', 'sub-1000575'],
                                   ...,
                                   ['UkBioBank', 'sub-6023847'],
                                   ['UkBioBank', 'sub-6024038'],
                                   ['UkBioBank', 'sub-6024754']], dtype=object),
              'hovertemplate': ('Dataset=%{customdata[0]}<br>Di' ... '{customdata[1]}<extra></extra>'),
              'legendgroup': 'UkBioBank',
              'marker': {'color': '#636efa', 'opacity': 0.5, 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'UkBioBank',
              'showlegend': True,
              'type': 'scattergl',
              'uid': 'a2c309d8-615d-4291-a961-77110915351d',
              'x': array([4.1357913 , 0.6279911 , 0.15751947, ..., 5.0329285 , 1.7571607 ,
                      

Clicked point ID: sub-3008133, Dataset: UkBioBank
Clicked point ID: sub-1098919, Dataset: UkBioBank
Clicked point ID: sub-4945076, Dataset: UkBioBank
Clicked point ID: sub-1333536, Dataset: UkBioBank
Clicked point ID: sub-1332153, Dataset: UkBioBank
Clicked point ID: sub-1294914, Dataset: UkBioBank
Clicked point ID: sub-5281703, Dataset: UkBioBank


In [19]:
print(subject_id_list, dataset_name_list)

['sub-3008133', 'sub-1098919', 'sub-4945076', 'sub-1333536', 'sub-1332153', 'sub-1294914', 'sub-5281703'] ['UkBioBank', 'UkBioBank', 'UkBioBank', 'UkBioBank', 'UkBioBank', 'UkBioBank', 'UkBioBank']


In [20]:
#subject_id_list = [214423, 849971, 208630, 683256, 589567]
#dataset_name_list = ['hcp' for i in range(5)]

In [25]:
side = "L"
region = "ORBITAL" #"CINGULATE"
nb_columns = 4


import anatomist.api as ana
from soma.qt_gui.qtThread import QtThreadCall
from soma.qt_gui.qt_backend import Qt

a = ana.Anatomist()

from soma import aims

def to_bucket(obj):
    if obj.type() == obj.BUCKET:
        return obj
    avol = a.toAimsObject(obj)
    c = aims.Converter(intype=avol, outtype=aims.BucketMap_VOID)
    abck = c(avol)
    bck = a.toAObject(abck)
    bck.releaseAppRef()
    return bck

def build_gradient(pal):
    gw = ana.cpp.GradientWidget(None, 'gradientwidget', pal.header()['palette_gradients'])
    gw.setHasAlpha(True)
    nc = pal.shape[0]
    rgbp = gw.fillGradient(nc, True)
    rgb = rgbp.data()
    npal = pal.np['v']
    pb = np.frombuffer(rgb, dtype=np.uint8).reshape((nc, 4))
    npal[:, 0, 0, 0, :] = pb
    npal[:, 0, 0, 0, :3] = npal[:, 0, 0, 0, :3][:, ::-1]  # BGRA -> RGBA
    pal.update()



bucket_files = []

for subject_id, dataset in zip(subject_id_list,dataset_name_list):
    if dataset.lower() in ['ukb', 'ukbiobank']:
        dataset = 'UkBioBank'
        path = f'/neurospin/dico/data/deep_folding/current/datasets/{dataset}/crops/2mm/{region}/mask/{side}crops'

    if dataset.lower() in ['hcp']:
        path = f'/neurospin/dico/data/deep_folding/current/datasets/{dataset.lower()}/crops/2mm/{region}/mask/{side}crops'

    filename = f'{path}/{subject_id}_cropped_skeleton.nii.gz'#.minf'

    if os.path. isfile(filename):
        bucket_files.append(filename)
    else:
        print(f"{filename} is not a correct path, or the .bck doesn't exist")
# print(bucket_files)

#####################################
# We create the blocks
#####################################

block = a.createWindowsBlock(nb_columns) # nb of columns
d = {}

for i, file in enumerate(bucket_files):
    d[f'bck_{i}'] = to_bucket(a.loadObject(file))
    d[f'w_{i}'] = a.createWindow('3D', block=block)#geometry=[100+400*(i%3), 100+440*(i//3), 400, 400])
    d[f'w_{i}'].addObjects(d[f'bck_{i}'])


#####################################
# We compute here the average
#####################################

dic_vol = {}
dim = 0
rep = 0
while dim == 0 and rep < len(subject_id_list):
    mm_skeleton_path = f"/neurospin/dico/data/deep_folding/current/datasets/{dataset_name_list[rep]}/crops/2mm/{region}/mask/{side}crops"
    if os.path. isfile(f'{mm_skeleton_path}/{subject_id_list[rep]}_cropped_skeleton.nii.gz'):
        sum_vol = aims.read(f'{mm_skeleton_path}/{subject_id_list[rep]}_cropped_skeleton.nii.gz').astype(float)
        dim = sum_vol.shape
        sum_vol.fill(0)
    else: 
        print(f'FileNotFound {mm_skeleton_path}/{subject_id_list[rep]}_cropped_skeleton.nii.gz')
        #raise FileNotFoundError(f'{mm_skeleton_path}/{subject_id_list[0]}_cropped_skeleton.nii.gz')
    rep += 1

for subject_id, dataset in zip(subject_id_list,dataset_name_list):
    if dataset.lower() in ['ukb', 'ukbiobank']:
        dataset = 'UkBioBank'
    elif dataset.lower() == 'hcp':
        dataset = 'hcp'
        
    mm_skeleton_path = f"/neurospin/dico/data/deep_folding/current/datasets/{dataset}/crops/2mm/{region}/mask/{side}crops"

    if os.path. isfile(f'{mm_skeleton_path}/{subject_id}_# We creat the blockscropped_skeleton.nii.gz'):
        vol = aims.read(f'{mm_skeleton_path}/{subject_id}_cropped_skeleton.nii.gz')
        # compare the dim with the first file dim

        if vol.np.shape != dim:
            raise ValueError(f"{subject_id_list[0]} and {subject_id} must have the same dim")

            
        # to have a binary 3D structure
        dic_vol[subject_id] = (vol.np > 0).astype(int)
        sum_vol.np[:] += (vol.np > 0).astype(int) 
    else: 
        print(f'FileNotFound {mm_skeleton_path}/{subject_id}_cropped_skeleton.nii.gz')
        #raise FileNotFoundError(f'{mm_skeleton_path}/{subject_id}_cropped_skeleton.nii.gz')

sum_vol = sum_vol / len(subject_id_list)
print(sum_vol, sum_vol.shape)


a_sum_vol = a.toAObject(sum_vol)
a_sum_vol.setPalette(minVal=0, absoluteMode=True)
wsum = a.createWindow('Sagittal', block=block)
wsum.addObjects(a_sum_vol)
rvol = a.fusionObjects(objects=[a_sum_vol], method='VolumeRenderingFusionMethod')
rvol.releaseAppRef()
# custom palette
n = len(subject_id_list)
pal = a.createPalette('VR-palette')
pal.header()['palette_gradients'] = f'0;0.244444;0.5;1;1;1#0;0;0.535897;0.222222;1;1#0;0.7;1;0#0;0;{0.5/n};0;1;1'
build_gradient(pal)
rvol.setPalette('VR-palette', minVal=0, absoluteMode=True)
pal2 = a.createPalette('slice-palette')
pal2.header()['palette_gradients'] = f'0;0.244444;0.5;1;1;1#0;0;0.535897;0.222222;1;1#0;0.7;1;0#0;0;{0.3/n};0;{0.7/n};1;1;1'
build_gradient(pal2)
a_sum_vol.setPalette('slice-palette')
# rvol.palette().fill()
wvr = a.createWindow('3D', block=block)
wvr.addObjects(rvol)
#print(dic_vol[subject_id_list[0]].shape)
#print(np.count_nonzero(dic_vol[subject_id_list[0]]))

In [None]:
# Create axis
axes = list(dim[:3])
dim1 = list(dim[:3])[0]
dim2 = list(dim[:3])[1]
dim3 = list(dim[:3])[2]

# Create Data
data = sum_vol.np.reshape(list(dim[:3]))

X, Y, Z = np.mgrid[0:dim1:1, 0:dim2:1, 0:dim3:1]
values = np.flip(data, axis=[1,2])

fig = go.Figure(data=go.Volume(
    x=X.flatten(),
    y=Y.flatten(),
    z=Z.flatten(),
    value=values.flatten(),
    isomin=0.1,
    isomax=1,
    opacity=0.1, # needs to be small to see through all surfaces
    surface_count=17, # needs to be a large number for good volume rendering
    ))
fig.show()

no position could be read at 395, 220
no position could be read at 261, 403
no position could be read at 461, 259
no position could be read at 462, 206
no position could be read at 376, 505
no position could be read at 297, 176
no position could be read at 591, 212
no position could be read at 611, 132
no position could be read at 480, 318
no position could be read at 254, 339
no position could be read at 486, 258
no position could be read at 427, 222
no position could be read at 608, 261
no position could be read at 116, 495


: 

Position : 40.519, 34.0857, 16.9794, 0


no position could be read at 241, 112
Position : 13.16, 38.051, 25.0639, 0
no position could be read at 156, 196


In [None]:
"""
data_filenames_str = ' '.join(data_filenames_list)

#os.system(f'anatomist --input {data_filenames_str}')

# Command to run the Python script inside the container
command = f'bv bash -c "python visu_anatomist.py {data_filenames_str}"'

# Execute the command
os.system(command)
"""

In [None]:
# See for more information
# https://plotly.com/python/line-and-scatter/
# https://plotly.com/python/setting-graph-size/
# app.run_server(debug=True)