# Seed maps
- Take a dataset and generate seed maps
- represent them as vectors in a masked array

In [19]:
import os
import time
import numpy as np
import pandas as pd
import nibabel as nib

In [20]:
scale = 7

In [21]:
# Paths
root_p = '/home/surchs/sim_big/PROJECT/abide_hps/'
mask_p = os.path.join(root_p, 'mask', 'MIST_mask.nii.gz')
temp_p = os.path.join(root_p, 'mask', 'MIST_{}.nii.gz'.format(scale))
pheno_p = os.path.join(root_p, 'pheno', 'abide_1_complete.csv')
# Data
fc_p = os.path.join(root_p, 'fc')
fc_t = 'fmri_{:07}_session_1_run1.nii.gz'
# Output
out_p = os.path.join(root_p, 'seed', 'MIST_{}'.format(scale))
if not os.path.isdir(out_p):
    os.makedirs(out_p)
out_t = 'sub_{{}}_mist_{0}'.format(scale)

In [22]:
def corr2_coeff(A,B):
    # Rowwise mean of input arrays & subtract from input arrays themeselves
    A_mA = A - A.mean(1)[:,None]
    B_mB = B - B.mean(1)[:,None]

    # Sum of squares across rows
    ssA = (A_mA**2).sum(1);
    ssB = (B_mB**2).sum(1);

    # Finally get corr coeff
    return np.dot(A_mA,B_mB.T)/np.sqrt(np.dot(ssA[:,None],ssB[None]))

In [23]:
# Load data
pheno = pd.read_csv(pheno_p)
mask_i = nib.load(mask_p)
mask = mask_i.get_data().astype(bool)
temp = nib.load(temp_p).get_data()
temp_m = temp[mask]

In [24]:
# Find the data
data_paths = [os.path.join(fc_p, fc_t.format(row['SUB_ID'])) for rid, row in pheno.iterrows()]
missing_data = [not os.path.isfile(i) for i in data_paths]

In [25]:
np.sum(missing_data)

0

In [26]:
# Get the regions
regions = np.unique(temp_m[temp_m!=0]).astype(int)
# Make the seed maps
start = time.time()
n_sub = pheno.shape[0]
for rid, row in pheno.iterrows():
    # See if we have already generate this for the subject
    search_p = os.path.join(out_p, out_t.format(row['SUB_ID']))
    search_p = '{}.npy'.format(search_p)
    if os.path.isfile(search_p):
        continue
    tic = time.time()
    p = os.path.join(fc_p, fc_t.format(row['SUB_ID']))
    ts = nib.load(p).get_data()[mask]
    # Get the average in network signal
    avg_net = np.array([np.mean(ts[temp_m==region, :], 0) for region in regions])
    # Seed stack
    seed_stack = corr2_coeff(avg_net, ts)
    # FisherZ transform
    seed_fz = np.arctanh(seed_stack)
    # Save the seed stack
    np.save(os.path.join(out_p, out_t.format(row['SUB_ID'])), seed_fz)
    if rid%20==0:
        toc = time.time()
        print('Done with sub {}/{}, this took {:.3f}s ({:.2f})'.format(rid+1, n_sub, toc-tic, toc-start))

Done with sub 1/1107, this took 0.874s (0.88)
Done with sub 21/1107, this took 1.582s (31.21)
Done with sub 41/1107, this took 1.991s (64.30)
Done with sub 61/1107, this took 2.974s (107.41)
Done with sub 81/1107, this took 1.496s (142.45)
Done with sub 101/1107, this took 0.661s (167.60)
Done with sub 121/1107, this took 2.090s (198.52)
Done with sub 141/1107, this took 2.230s (244.88)
Done with sub 161/1107, this took 2.259s (293.97)
Done with sub 181/1107, this took 2.005s (337.30)
Done with sub 201/1107, this took 1.480s (364.02)
Done with sub 221/1107, this took 1.322s (390.87)
Done with sub 241/1107, this took 1.352s (421.66)
Done with sub 261/1107, this took 1.915s (457.23)
Done with sub 281/1107, this took 1.733s (491.66)
Done with sub 301/1107, this took 1.805s (525.06)
Done with sub 321/1107, this took 1.673s (558.75)
Done with sub 341/1107, this took 1.816s (595.73)
Done with sub 361/1107, this took 1.986s (632.58)
Done with sub 381/1107, this took 1.956s (668.25)
Done with 

In [27]:
print('Done')

Done
