# Seed maps
- Take a dataset and generate seed maps
- represent them as vectors in a masked array

In [1]:
import os
import time
import numpy as np
import pandas as pd
import nibabel as nib

In [2]:
# Paths
root_p = '/home/surchs/sim_big/PROJECT/abide_hps/'
mask_p = os.path.join(root_p, 'mask', 'MIST_mask.nii.gz')
temp_p = os.path.join(root_p, 'mask', 'MIST_20.nii.gz')
pheno_p = os.path.join(root_p, 'pheno', 'psm_abide1.csv')
# Data
fc_p = os.path.join(root_p, 'fc')
fc_t = 'fmri_{:07}_session_1_run1.nii.gz'
# Output
out_p = os.path.join(root_p, 'seed', 'MIST_20')
if not os.path.isdir(out_p):
    os.makedirs(out_p)
out_t = 'sub_{}_mist_20'

In [3]:
def corr2_coeff(A,B):
    # Rowwise mean of input arrays & subtract from input arrays themeselves
    A_mA = A - A.mean(1)[:,None]
    B_mB = B - B.mean(1)[:,None]

    # Sum of squares across rows
    ssA = (A_mA**2).sum(1);
    ssB = (B_mB**2).sum(1);

    # Finally get corr coeff
    return np.dot(A_mA,B_mB.T)/np.sqrt(np.dot(ssA[:,None],ssB[None]))

In [4]:
# Load data
pheno = pd.read_csv(pheno_p)
mask_i = nib.load(mask_p)
mask = mask_i.get_data().astype(bool)
temp = nib.load(temp_p).get_data()
temp_m = temp[mask]

In [5]:
# Find the data
data_paths = [os.path.join(fc_p, fc_t.format(row['SUB_ID'])) for rid, row in pheno.iterrows()]
missing_data = [not os.path.isfile(i) for i in data_paths]

In [6]:
np.sum(missing_data)

0

In [7]:
# Get the regions
regions = np.unique(temp_m[temp_m!=0]).astype(int)
# Make the seed maps
start = time.time()
for rid, row in pheno.iterrows():
    tic = time.time()
    p = os.path.join(fc_p, fc_t.format(row['SUB_ID']))
    ts = nib.load(p).get_data()[mask]
    # Get the average in network signal
    avg_net = np.array([np.mean(ts[temp_m==region, :], 0) for region in regions])
    # Seed stack
    seed_stack = corr2_coeff(avg_net, ts)
    # FisherZ transform
    seed_fz = np.arctanh(seed_stack)
    # Save the seed stack
    np.save(os.path.join(out_p, out_t.format(row['SUB_ID'])), seed_fz)
    if rid%20==0:
        toc = time.time()
        print('Done with sub {}, this took {:.3f}s ({:.2f})'.format(rid+1, toc-tic, toc-start))

Done with sub 1, this took 0.876s (0.88)
Done with sub 21, this took 0.347s (14.14)
Done with sub 41, this took 1.493s (37.31)
Done with sub 61, this took 0.816s (62.50)
Done with sub 81, this took 0.802s (83.54)
Done with sub 101, this took 0.778s (104.88)
Done with sub 121, this took 1.310s (126.88)
Done with sub 141, this took 1.105s (149.25)
Done with sub 161, this took 1.014s (172.19)
Done with sub 181, this took 1.182s (194.74)
Done with sub 201, this took 1.372s (217.84)
Done with sub 221, this took 1.047s (237.22)
Done with sub 241, this took 0.780s (255.78)
Done with sub 261, this took 0.861s (270.51)
Done with sub 281, this took 2.068s (286.87)
Done with sub 301, this took 1.539s (319.07)
Done with sub 321, this took 1.628s (347.41)
Done with sub 341, this took 2.205s (385.94)
Done with sub 361, this took 4.047s (434.76)
