In [493]:
import numpy as np
import os
import pandas as pd
from sklearn.metrics import pairwise_distances
from sklearn.manifold import Isomap

In [512]:
# load the hcp embeddings
## Left hemisphere : dim4 isomap 10% corr with handedness
embds = pd.read_csv('/neurospin/dico/jlaval/Output/SC-sylv_left_V1_dimensions/256dims_80epochs/hcp_random_embeddings/train_val_embeddings.csv')
## Right hemisphere : nothing significant, max 5%
#embds = pd.read_csv('/neurospin/dico/jlaval/Output/SC-sylv_right_V1_dimensions/256dims_epochs80/hcp_random_embeddings/train_val_embeddings.csv')
n_components=5
n_neighbors=10
metric = 'euclidean'
#metric='cosine'

In [513]:
# compute distance matrix
dist_matrix = pairwise_distances(embds.values, metric=metric)

In [514]:
# apply isomap
isomap = Isomap(n_components=n_components, n_neighbors=n_neighbors, metric='precomputed')

In [515]:
res = isomap.fit_transform(dist_matrix)

In [516]:
dims = [f'dim{k+1}' for k in range(n_components)]
df_res = pd.DataFrame(res, columns=dims)
df_res['Subject'] = embds['ID']

In [517]:
## for each dim, correlate with handedness score
handedness = pd.read_csv('/neurospin/dico/mguevara/RESTRICTED_jfmangin_9_26_2018_3_24_50_.csv', usecols=['Subject', 'Handedness'])

In [518]:
df_res = df_res.merge(handedness, on='Subject', how='left')

In [519]:
df_res

Unnamed: 0,dim1,dim2,dim3,dim4,dim5,Subject,Handedness
0,-279575.514656,-2493.661002,716.760821,-1360.990651,-621.296313,100206,65
1,-279438.273440,-2490.368959,714.240910,-1354.368536,-617.074030,100307,95
2,-279390.283436,-2491.302166,714.354497,-1356.218435,-618.960578,100408,55
3,-279168.862287,-2488.039560,711.259105,-1349.586424,-615.405890,100610,85
4,-278754.185041,-2480.453002,704.744910,-1334.142051,-606.280839,101006,90
...,...,...,...,...,...,...,...
1108,541839.705453,-791.227710,-1472.181601,-210.765923,373.097597,910443,65
1109,554275.974862,-827.381683,-1579.096627,-245.983501,498.578990,922854,90
1110,561874.318808,-850.559388,-1648.074105,-268.836265,580.276104,930449,100
1111,591016.774218,-917.068104,-1834.373055,-324.595344,756.356097,959574,65


In [520]:
# for each dim, compute pearson correlation with handedness, and report p-value
from scipy.stats import pearsonr
correlations = {}
pvalues = {}
target = df_res['Handedness'].to_numpy()
for i in range(n_components):
    dim = 'dim' + str(i + 1)
    corr, pval = pearsonr(df_res[dim].to_numpy(), target)
    correlations[dim] = corr
    pvalues[dim] = pval

In [521]:
print(correlations)

{'dim1': np.float64(0.003916854227625918), 'dim2': np.float64(0.013282610446876222), 'dim3': np.float64(-0.01132986405200013), 'dim4': np.float64(0.014039066425121816), 'dim5': np.float64(0.02152618408772124)}


In [522]:
print(pvalues)

{'dim1': np.float64(0.8961499643306505), 'dim2': np.float64(0.6580178595810534), 'dim3': np.float64(0.7057497563812734), 'dim4': np.float64(0.6398823843022183), 'dim5': np.float64(0.4731117241757455)}


# Try CCA

In [523]:
from sklearn.cross_decomposition import CCA

In [524]:
cca = CCA(n_components=1)
cca.fit(df_res[dims], df_res['Handedness'])

In [525]:
cca_score = cca.score(df_res[dims], df_res['Handedness'])

In [526]:
cca_score

-0.9373707003319314

In [527]:
## correlate isomap space with sulci isomap space
sulci_isomap = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/hcp/hcp_isomap_labels_SC-sylv_left.csv', usecols=['Subject']+[f'Isomap_central_left_dim{k}' for k in range(1,7)])

In [528]:
## sort the subjects
df_res_merged = df_res.merge(sulci_isomap, on='Subject', how='left', suffixes=(None, None))
df_res_merged.dropna(inplace=True)

sulci_isomap_merged = sulci_isomap.merge(df_res, on='Subject', how='left', suffixes=(None, None))
sulci_isomap_merged.dropna(inplace=True)

In [529]:
## compute pairwise correlations between isomap from sulci and isomap from embeddings
for i in range(n_components):
    for j in range(1,7):
        dim = 'dim' + str(i + 1)
        dim_sulci = 'Isomap_central_left_dim' + str(j)
        corr, pval = pearsonr(df_res_merged[dim].to_numpy(), sulci_isomap_merged[dim_sulci].to_numpy())
        print(f'Correlation between {dim} and {dim_sulci}: {corr}, p-value: {pval}')

Correlation between dim1 and Isomap_central_left_dim1: -0.003193765982213938, p-value: 0.9245406157074055
Correlation between dim1 and Isomap_central_left_dim2: 0.028567184765277216, p-value: 0.39678787682011923
Correlation between dim1 and Isomap_central_left_dim3: 0.024935173210048206, p-value: 0.4595417340841238
Correlation between dim1 and Isomap_central_left_dim4: -0.04890761700747007, p-value: 0.14669889786601012
Correlation between dim1 and Isomap_central_left_dim5: 0.020915320519964486, p-value: 0.5350337732479069
Correlation between dim1 and Isomap_central_left_dim6: -0.002971846943574093, p-value: 0.929769871263736
Correlation between dim2 and Isomap_central_left_dim1: -0.016696701326202282, p-value: 0.6204603397141852
Correlation between dim2 and Isomap_central_left_dim2: 0.004441643617646079, p-value: 0.8952028660027085
Correlation between dim2 and Isomap_central_left_dim3: 0.031386896092242954, p-value: 0.3518273280026465
Correlation between dim2 and Isomap_central_left_di

In [492]:
sulci_isomap

Unnamed: 0,Subject,Isomap_central_left_dim1,Isomap_central_left_dim2,Isomap_central_left_dim3,Isomap_central_left_dim4,Isomap_central_left_dim5,Isomap_central_left_dim6
0,100206,-0.229650,7.389883,0.348397,-1.861678,0.842310,0.582308
1,100307,1.573895,-2.751750,1.045617,-1.803867,0.096300,1.880110
2,100408,-5.200937,-3.329636,-1.687213,-0.164033,-0.129700,0.668525
3,100610,2.131914,0.025349,-0.288740,-1.789072,-1.106825,-0.100832
4,101006,0.411594,0.454800,-2.476906,1.865777,-1.723096,1.408883
...,...,...,...,...,...,...,...
878,992673,2.688382,7.092495,-2.360218,-0.831440,-1.927135,-0.885679
879,992774,3.216161,-1.978102,-3.380473,-0.248682,0.209391,-0.840647
880,993675,1.654016,1.375164,0.233411,-0.785303,-1.868195,-2.888935
881,994273,1.177521,4.096813,-1.256984,3.123394,0.146602,1.708059
