In [1]:
import os
import h5py
import numpy as np
import pandas as pd
from scipy.io import loadmat
from scipy.stats import pearsonr
import nibabel as nb
import seaborn as sns

### get subject id's

In [2]:
# get HCP - S900 subject list        
subjlist = '../data/subjectListS900_QC_gr.txt'
f = open(subjlist); mylist = f.read().split("\n"); f.close() 
subjlist = joinedlist = mylist[:-1]

len(subjlist)

mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


709


# prepare for solar, t1w-t2w ratios

### get the HCP demographics

In [3]:
HCP_u = pd.read_csv('../data/HCP_solar/HCP_s1200_unrestricted.csv', 
                    index_col = 0)

HCP_u.index = HCP_u.index.map(str)

HCP_r = pd.read_csv('../data/HCP_solar/HCP_s1200_restricted.csv', 
                    index_col = 0)

HCP_r.index = HCP_r.index.map(str)


In [4]:
len(HCP_u), len(HCP_r)

(1206, 1206)

In [5]:
HCP_r.index

Index(['100004', '100206', '100307', '100408', '100610', '101006', '101107',
       '101208', '101309', '101410',
       ...
       '987983', '989987', '990366', '991267', '992673', '992774', '993675',
       '994273', '995174', '996782'],
      dtype='object', name='Subject', length=1206)

# combine t1t2-ratios with HCP demogr.

### LSUB

In [11]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because subiculum has 1024 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

709
1024


('node_1', '....', 'node_1024')

In [12]:
mycols = ['age', 'sex'] + node_str 

In [8]:
#mycols

In [13]:
df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

In [14]:
#df

In [15]:

ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_sub_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])

    
    

In [16]:
#df

In [17]:
df = df.dropna()
len(df)

709

In [18]:
df.to_csv('../solar/solar_msm50_t1t2_lsub/t1t2_lsub.csv')

### LCA

In [19]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 2048 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [20]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_ca_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [21]:
df = df.dropna()
len(df)

709

In [22]:
df.to_csv('../solar/solar_msm50_t1t2_lca/t1t2_lca.csv')

### LDG

In [26]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [27]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_dg_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [28]:
df = df.dropna()
len(df)

709

In [29]:
df.to_csv('../solar/solar_msm50_t1t2_ldg/t1t2_ldg.csv')

### RSUB

In [30]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [32]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_sub_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [33]:
df = df.dropna()
len(df)

709

In [34]:
df.to_csv('../solar/solar_msm50_t1t2_rsub/t1t2_rsub.csv')

### RCA

In [35]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 2048
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [36]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_ca_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [37]:
df = df.dropna()
len(df)

709

In [38]:
df.to_csv('../solar/solar_msm50_t1t2_rca/t1t2_rca.csv')

### RDG

In [39]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [40]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_dg_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [41]:
df = df.dropna()
len(df)

709

In [42]:
df.to_csv('../solar/solar_msm50_t1t2_rdg/t1t2_rdg.csv')