# Subtypes
- generate FC subtypes
- generate CT subtypes
- compute weights for them on the sample

In [1]:
# Imports
import os
import numpy as np
import scipy as sp
import pandas as pd
import patsy as pat
import nibabel as nib
from scipy import cluster as scl
from sklearn import linear_model as sln
from sklearn import preprocessing as skp

In [2]:
# Paths
root_p = '/home/surchs/sim_big/PROJECT/abide_hps/'
# Pheno
sample_p = os.path.join(root_p, 'pheno', 'strict_abide1_nopsm.csv')
# Data sd_30mm_residual_nopsm
resid_ct_p = os.path.join(root_p, 'residual', 'ct_30mm_residual_nopsm.npy')
resid_fc_p = os.path.join(root_p, 'residual', 'sd_30mm_residual_nopsm.npy')
mask_p = os.path.join(root_p, 'mask', 'MIST_mask.nii.gz')
# Out templates
subtype_fc_p = os.path.join(root_p, 'subtypes', 'subtypes_fc_nopsm')
subtype_ct_p = os.path.join(root_p, 'subtypes', 'subtypes_ct_nopsm')

In [3]:
# Load data
sample = pd.read_csv(sample_p)
n_sub = sample.shape[0]
ct = np.load(resid_ct_p)
fc = np.load(resid_fc_p)
mask_i = nib.load(mask_p)
mask = mask_i.get_data().astype(bool)

# Generate CT subtype
- 5 subtypes
- distance based
- hierarchical using wards

In [4]:
def corr2_coeff(A,B):
    # Rowwise mean of input arrays & subtract from input arrays themeselves
    A_mA = A - A.mean(1)[:,None]
    B_mB = B - B.mean(1)[:,None]

    # Sum of squares across rows
    ssA = (A_mA**2).sum(1);
    ssB = (B_mB**2).sum(1);

    # Finally get corr coeff
    return np.dot(A_mA,B_mB.T)/np.sqrt(np.dot(ssA[:,None],ssB[None]))

In [5]:
def subtype(stack, n_subtypes):
    # Normalize and then get the distance
    norm = skp.scale(stack, axis=1)
    # Get the lower triangle of the distance metric
    dist = sp.spatial.distance.pdist(norm)
    # Build the cluster
    link = scl.hierarchy.linkage(dist, method='ward')
    order = scl.hierarchy.dendrogram(link, no_plot=True)['leaves']
    part = scl.hierarchy.fcluster(link, n_subtypes, criterion='maxclust')
    return order, part, dist

In [6]:
# Run the subtypes
order_ct, part_ct, dist_ct = subtype(ct, 5)
dmat_ct = sp.spatial.distance.squareform(dist_ct)
# Make the subtypes
subtypes_ct = np.array([np.mean(ct[part_ct==i, :], 0) for i in range(1,6)])
# Compute the weights
weights_ct = corr2_coeff(ct, subtypes_ct)

In [7]:
# Run the FC subtypes
weights_fc = np.zeros((n_sub, 5, 20))
subtypes_fc = np.zeros((5,) + fc.shape[1:])
parts_fc = np.zeros((n_sub, 20))
orders_fc = np.zeros((n_sub, 20))
dists_fc = np.zeros((n_sub, n_sub, 20))

for sid in range(20):
    order_fc, part_fc, dist_fc = subtype(fc[..., sid], 5)
    dists_fc[..., sid] = sp.spatial.distance.squareform(dist_fc)
    parts_fc[:, sid] = part_fc
    orders_fc[:, sid] = order_fc
    # Make the subtypes
    subtypes_fc_tmp = np.array([np.mean(fc[part_fc==i, :, sid], 0) for i in range(1,6)])
    subtypes_fc[..., sid] = subtypes_fc_tmp
    # Compute the weights
    weights_fc[..., sid] = corr2_coeff(fc[..., sid], subtypes_fc_tmp)

In [8]:
# Save everything
np.savez(subtype_fc_p, subtypes=subtypes_fc, weights=weights_fc, order=orders_fc, part=parts_fc, dist=dists_fc)
np.savez(subtype_ct_p, subtypes=subtypes_ct, weights=weights_ct, order=order_ct, part=part_ct, dist=dmat_ct)