# Seed maps
- Take a dataset and generate seed maps
- represent them as vectors in a masked array

In [1]:
import os
import time
import numpy as np
import pandas as pd
import nibabel as nib

In [2]:
# Paths
root_p = '/home/surchs/sim_big/PROJECT/abide_hps/'
mask_p = os.path.join(root_p, 'mask', 'MIST_mask.nii.gz')
temp_p = os.path.join(root_p, 'mask', 'MIST_20.nii.gz')
pheno_p = os.path.join(root_p, 'pheno', 'abide_1_complete.csv')
# Data
fc_p = os.path.join(root_p, 'fc')
fc_t = 'fmri_{:07}_session_1_run1.nii.gz'
# Output
out_p = os.path.join(root_p, 'seed', 'MIST_20')
if not os.path.isdir(out_p):
    os.makedirs(out_p)
out_t = 'sub_{}_mist_20'

In [3]:
def corr2_coeff(A,B):
    # Rowwise mean of input arrays & subtract from input arrays themeselves
    A_mA = A - A.mean(1)[:,None]
    B_mB = B - B.mean(1)[:,None]

    # Sum of squares across rows
    ssA = (A_mA**2).sum(1);
    ssB = (B_mB**2).sum(1);

    # Finally get corr coeff
    return np.dot(A_mA,B_mB.T)/np.sqrt(np.dot(ssA[:,None],ssB[None]))

In [4]:
# Load data
pheno = pd.read_csv(pheno_p)
mask_i = nib.load(mask_p)
mask = mask_i.get_data().astype(bool)
temp = nib.load(temp_p).get_data()
temp_m = temp[mask]

In [5]:
# Find the data
data_paths = [os.path.join(fc_p, fc_t.format(row['SUB_ID'])) for rid, row in pheno.iterrows()]
missing_data = [not os.path.isfile(i) for i in data_paths]

In [6]:
np.sum(missing_data)

0

In [7]:
# Get the regions
regions = np.unique(temp_m[temp_m!=0]).astype(int)
# Make the seed maps
start = time.time()
n_sub = pheno.shape[0]
for rid, row in pheno.iterrows():
    # See if we have already generate this for the subject
    search_p = os.path.join(out_p, out_t.format(row['SUB_ID']))
    search_p = '{}.npy'.format(search_p)
    if os.path.isfile(search_p):
        continue
    tic = time.time()
    p = os.path.join(fc_p, fc_t.format(row['SUB_ID']))
    ts = nib.load(p).get_data()[mask]
    # Get the average in network signal
    avg_net = np.array([np.mean(ts[temp_m==region, :], 0) for region in regions])
    # Seed stack
    seed_stack = corr2_coeff(avg_net, ts)
    # FisherZ transform
    seed_fz = np.arctanh(seed_stack)
    # Save the seed stack
    np.save(os.path.join(out_p, out_t.format(row['SUB_ID'])), seed_fz)
    if rid%20==0:
        toc = time.time()
        print('Done with sub {}/{}, this took {:.3f}s ({:.2f})'.format(rid+1, n_sub, toc-tic, toc-start))

Done with sub 21/1107, this took 10.228s (24.63)
Done with sub 41/1107, this took 1.637s (61.47)
Done with sub 61/1107, this took 2.475s (96.56)
Done with sub 161/1107, this took 1.776s (220.22)
Done with sub 181/1107, this took 1.734s (256.68)
Done with sub 221/1107, this took 1.238s (290.88)
Done with sub 241/1107, this took 2.563s (313.75)
Done with sub 301/1107, this took 1.641s (340.79)
Done with sub 321/1107, this took 1.685s (360.99)
Done with sub 341/1107, this took 1.556s (432.04)
Done with sub 381/1107, this took 1.587s (443.02)
Done with sub 401/1107, this took 1.446s (457.91)
Done with sub 461/1107, this took 0.630s (480.97)
Done with sub 501/1107, this took 0.574s (519.94)
Done with sub 581/1107, this took 1.664s (564.51)
Done with sub 601/1107, this took 1.664s (595.36)
Done with sub 621/1107, this took 1.622s (627.12)
Done with sub 641/1107, this took 0.656s (659.11)
Done with sub 661/1107, this took 1.800s (690.09)
Done with sub 821/1107, this took 0.930s (772.59)
Done 