In [14]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import umap
import plotly.express as px

### Load your data

In [2]:
embeddings_HCP = pd.read_csv("/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-17/17-33-50_137/HCP_epoch60_embeddings/full_embeddings.csv", index_col=0)
embeddings_UKB = pd.read_csv("/neurospin/dico/adufournet/Runs/02_Heritability_Left_PCS_HCP/Output/2024-05-17/17-33-50_137/UKB_epoch60_embeddings/full_embeddings.csv", index_col=0)
embeddings_UKB.head()

Unnamed: 0_level_0,dim1,dim2,dim3,dim4,dim5,dim6,dim7,dim8,dim9,dim10,...,dim247,dim248,dim249,dim250,dim251,dim252,dim253,dim254,dim255,dim256
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
sub-1000021,4.984328,5.862696,35.545628,21.53085,-15.400157,4.8033,-3.270256,17.67348,5.964229,10.379309,...,-5.588728,1.781751,-6.887294,4.123653,-3.004085,24.731771,5.531009,-5.475274,-2.560727,0.488364
sub-1000458,9.68172,-17.024338,-21.298676,-10.950365,22.326513,-8.129682,-18.481632,-4.854505,19.746336,-11.97457,...,-3.582124,-40.108284,7.88235,-8.344917,-11.103089,-21.647219,-2.470874,-20.251673,-7.385629,5.577127
sub-1000575,-4.692525,-26.79043,30.774847,-11.931572,11.357469,0.34341,6.613949,-11.375133,-20.105026,3.176765,...,4.907033,3.553167,25.210009,-7.363922,-9.255467,-2.145081,-12.499691,-5.062853,2.04008,-6.979667
sub-1000606,-7.469733,-1.329392,21.345432,0.904605,10.759202,-2.359774,-38.27805,-3.586395,13.992952,-6.50496,...,-6.491087,-10.04078,3.959444,-7.020837,22.448591,-41.28251,-9.342698,7.340107,-12.833817,-6.211372
sub-1000963,-21.217674,-1.909686,-5.333684,10.033036,-7.345963,-9.958603,-5.830301,-1.839255,22.2825,-18.50236,...,-1.4368,-49.797806,-12.431572,-9.649385,-5.362879,3.641441,-5.760827,-3.505362,-7.708777,-5.034035


### Scale your data

In [13]:
scaler = StandardScaler()
scaler.fit(embeddings_UKB)

scl_bdd_hcp = scaler.transform(embeddings_HCP)
scl_bdd_ukb = scaler.transform(embeddings_UKB)

### 3D UMAP

In [11]:
reducer3D = umap.UMAP(n_components=3)

reducer3D.fit(scl_bdd_ukb)

bdd_3D_HCP = reducer3D.transform(scl_bdd_hcp)
bdd_3D_UKB = reducer3D.transform(scl_bdd_ukb)

In [39]:
bdd_3D_HCP = pd.DataFrame(bdd_3D_HCP, columns=['Dim 1', 'Dim 2', 'Dim 3'])
bdd_3D_UKB = pd.DataFrame(bdd_3D_UKB, columns=['Dim 1', 'Dim 2', 'Dim 3'])

bdd_3D_HCP['Dataset'] = 'HCP'
bdd_3D_UKB['Dataset'] = 'UKB'

bdd_3D_HCP['ID'] = embeddings_HCP.index
bdd_3D_UKB['ID'] = embeddings_UKB.index

bdd_3D_All = pd.concat([bdd_3D_UKB,bdd_3D_HCP], axis=0)
bdd_3D_All.head()

Unnamed: 0,Dim 1,Dim 2,Dim 3,Dataset,ID
0,5.947046,4.005697,5.141806,UKB,sub-1000021
1,1.724398,3.75478,4.565248,UKB,sub-1000458
2,0.857146,6.782066,4.859478,UKB,sub-1000575
3,1.051789,4.671617,6.05892,UKB,sub-1000606
4,5.069192,4.495547,5.522171,UKB,sub-1000963


### Plot it in the notebook or write a html file

In [40]:
fig = px.scatter_3d(
    bdd_3D_All, x='Dim 1', y='Dim 2', z='Dim 3', 
    color='Dataset',
    title=f'3D UMAP HCP UKB',
    labels={'0': 'dim 1', '1': 'dim 2', '2': 'dim 3'},
    text='ID',
    opacity=0.7,
    width=800, height=600, 
)
fig.update_traces(mode = 'markers')
#fig.write_html("html_plot/Left_PCS_HCP.html")
fig.show()

### 2D UMAP

In [18]:
reducer2D = umap.UMAP(n_components=2)

reducer2D.fit(scl_bdd_ukb)

bdd_2D_HCP = reducer2D.transform(scl_bdd_hcp)
bdd_2D_UKB = reducer2D.transform(scl_bdd_ukb)

In [30]:
bdd_2D_HCP = pd.DataFrame(bdd_2D_HCP, columns=['Dim 1', 'Dim 2'])
bdd_2D_UKB = pd.DataFrame(bdd_2D_UKB, columns=['Dim 1', 'Dim 2'])

bdd_2D_HCP['Dataset'] = 'HCP'
bdd_2D_UKB['Dataset'] = 'UKB'

bdd_2D_HCP['ID'] = embeddings_HCP.index
bdd_2D_UKB['ID'] = embeddings_UKB.index

bdd_2D_All = pd.concat([bdd_2D_UKB,bdd_2D_HCP], axis=0)
bdd_2D_All.head()

Unnamed: 0,Dim 1,Dim 2,Dataset,ID
0,6.708761,4.073947,UKB,sub-1000021
1,1.520369,3.622278,UKB,sub-1000458
2,0.198368,6.938894,UKB,sub-1000575
3,-0.117576,3.660171,UKB,sub-1000606
4,5.662771,2.960972,UKB,sub-1000963


### Plot it in the notebook or write a html file

In [42]:
fig = px.scatter(
    bdd_2D_All, x='Dim 1', y='Dim 2', 
    color='Dataset',
    title=f'2D UMAP HCP and UKB',
    labels={'0': 'dim 1', '1': 'dim 2'},
    text='ID',
    opacity=0.5,
    width=800, height=600, 
    #size = [10 if x == 'HCP' \
    #        else 1 for x in bdd_2D_All['Dataset']]

)
fig.update_traces(mode = 'markers')
#fig.write_html("html_plot/Left_PCS_HCP_UKB_2D.html")
fig.show()

In [25]:
# See for mor information
# https://plotly.com/python/setting-graph-size/