# ABIDE with 16p and 22q similarity
Generates a table with the individual similarity scores

In [1]:
import numpy as np
import pandas as pd
import patsy as pat
import nibabel as nib
import pathlib as pal
import seaborn as sbn
from scipy import io as sio
from sklearn import linear_model as sln
from matplotlib import pyplot as plt

In [4]:
#root_p = pal.Path('/home/users/surchs/mnt/cedar_jacrrg/su_cm/paper_16p22q/')
root_p = pal.Path('/home/surchs/mnt/stark_cisl/paper_16p22q/')
dc16_p = root_p / 'glm/16p/cambridge64/del_minus_con/glm_del_minus_con_cambridge64.mat'
dc22_p = root_p / 'glm/22q/cambridge64/del_minus_con/glm_del_minus_con_cambridge64.mat'

abide_p = root_p / 'glm/abide/cambridge64/individual'
conn_t = 'connectome_s{}_cambridge64.mat'
pheno_p = root_p / 'pheno/abide_men.csv'
labels_p = pal.Path('/home/surchs/mnt/stark_cisl/ATLAS/MIST/Parcel_Information/MIST_64.csv')

In [5]:
# Get the pheno file
pheno = pd.read_csv(pheno_p)
labels = pd.read_csv(labels_p, delimiter=';')

In [6]:
def octvec2mat(vec, mask):
    """
    Takes a vector in Fortran style (Octave/Matlab)
    and maps it back into a mask correctly in 
    python
    """
    vec_mat = mask.flatten(order='F').astype(bool)
    tmp = np.zeros_like(vec_mat, dtype=float)
    tmp[vec_mat] = vec
    vol = np.reshape(tmp, mask.shape, order='F')
    return vol

In [7]:
def vec2mat(vec, mask):
    tmp_mat = octvec2mat(vec, mask)
    tmp_mat += tmp_mat.T
    tmp_mat[np.eye(mask.shape[0]).astype(bool)] = tmp_mat[np.eye(mask.shape[0]).astype(bool)]/2
    return tmp_mat

In [14]:
# Get the connectomes
conn = np.array([sio.loadmat(str(abide_p / conn_t.format(row['SUB_ID'])))['asd_con']['connectome'][0][0]
        for rid, row in pheno.iterrows()]).squeeze()

In [16]:
# Prepare regression, with control as reference
model = pat.dmatrix('SITE_ID+FD_scrubbed', data=pheno, return_type='dataframe')

In [17]:
# Run the glm
mod = sln.LinearRegression(fit_intercept=False, normalize=True)
res = mod.fit(model, conn)
# Get the residuals
resid = conn - res.predict(model)

In [18]:
resid_mat = np.array([vec2mat(resid[i, :], np.tril(np.ones((64,64))).astype(bool)) for i in range(resid.shape[0])])
abide_mat = np.array([resid_mat[i, ...] + resid_mat[i, ...].T for i in range(resid_mat.shape[0])])
# Remove duplicates on diagnonal
diag = abide_mat[..., np.identity(64, dtype=bool)]
abide_mat[..., np.identity(64, dtype=bool)] = diag/2

In [19]:
# Get the reference
dc16_pattern = np.array(sio.loadmat(dc16_p)['eff']).squeeze()
# Map back to matrix
dc16_refm = octvec2mat(dc16_pattern, np.tril(np.ones((64,64))).astype(bool))
dc16_refm += dc16_refm.T
dref = dc16_refm[np.identity(64, dtype=bool)]
dc16_refm[np.identity(64, dtype=bool)] = dref/2

In [20]:
# Get the reference
dc22_pattern = np.array(sio.loadmat(dc22_p)['eff']).squeeze()
# Map back to matrix
dc22_refm = octvec2mat(dc22_pattern, np.tril(np.ones((64,64))).astype(bool))
dc22_refm += dc22_refm.T
dref = dc22_refm[np.identity(64, dtype=bool)]
dc22_refm[np.identity(64, dtype=bool)] = dref/2

In [21]:
# Make weights for each node
w_abide_16p = np.zeros((abide_mat.shape[0], dc16_refm.shape[0]))
for i in range(64):
    for j in range(abide_mat.shape[0]):
        w_abide_16p[j, i] = np.corrcoef(dc16_refm[i, :], abide_mat[j, i, :])[0,1]

In [22]:
# Make weights for each node
w_abide_22q = np.zeros((abide_mat.shape[0], dc22_refm.shape[0]))
for i in range(64):
    for j in range(abide_mat.shape[0]):
        w_abide_22q[j, i] = np.corrcoef(dc22_refm[i, :], abide_mat[j, i, :])[0,1]

In [23]:
table_abide_16p = pd.DataFrame(data=w_abide_16p, index=pheno.DX_GROUP, columns=labels.label)
table_abide_22q = pd.DataFrame(data=w_abide_22q, index=pheno.DX_GROUP, columns=labels.label)

In [25]:
table_abide_16p.to_csv('/home/surchs/abide_16p_similarity.tsv', sep='\t')
table_abide_22q.to_csv('/home/surchs/abide_22q_similarity.tsv', sep='\t')