# Compute z-scores using PPI matrices and null models
This script takes the PPI matrices for each subject

# 1: Set up environment

In [None]:
#importing libraries
%matplotlib inline
from __future__ import print_function, division
import scipy.stats as ss
import os
from os.path import join as opj
import pandas as pd
import numpy as np
from numpy import inf
import matplotlib.pyplot as plt
import seaborn as sns
from nilearn import image, plotting
import nibabel as nb
import bct
from mne.stats import fdr_correction
from nistats import thresholding
from random import shuffle

print('Done Importing Packages')

In [None]:
# Change to directory where you saved the data
home_path = '/Users/steventompson/Git/tompson_netlearn_fmri'

data_dir = opj(home_path,'data')
template_dir = opj(data_dir,'brain_atlas')
path_InpData = opj(data_dir,'netLearn_ppi')
path_NullData = opj(data_dir,'netLearn_ppi_null')
path_OutpData = opj(data_dir,'ppi_zscores')
path_Figures = opj(home_path,'figures','component_figs') # folder to put figures


for path in [path_OutpData, path_Figures]:
    if not os.path.exists(path):
        print('Path does not exist, creating {}'.format(path))
        os.makedirs(path)

print('Set data paths')

In [None]:
# Set variables
n_subjs = 26
n_node = 410
n_perm = 500

In [None]:
def conv_cfg_vec_to_adj_matr(conn_vec):
    '''
    Convert connections to adjacency matrix
    Assumes symmetric connectivity

    Parameters
    ----------
        conn_vec: numpy.ndarray
            Vector with shape (n_conn,) specifying unique connections

    Returns
    -------
        adj_matr: numpy.ndarray
            Symmetric matrix with shape (n_node, n_node)
    '''
    # Standard param checks
    #errors.check_type(conn_vec, np.ndarray)
    if not len(conn_vec.shape) == 1:
        raise ValueError('%r has more than 1-dimension')

    # Compute number of nodes
    n_node = int(np.floor(np.sqrt(2*len(conn_vec)))+1)

    # Compute upper triangle indices (by convention)
    triu_ix, triu_iy = np.triu_indices(n_node, k=1)

    # Convert to adjacency matrix
    adj_matr = np.zeros((n_node, n_node))
    adj_matr[triu_ix, triu_iy] = conn_vec

    adj_matr += adj_matr.T

    return adj_matr


def convert_adj_matr_to_cfg_matr(adj_matr):
    '''
    Convert connections to adjacency matrix
    Assumes symmetric connectivity

    Parameters
    ----------
        adj_matr: numpy.ndarray
            Matrix with shape (n_win, n_node, n_node)

    Returns
    -------
        cfg_matr: numpy.ndarray
            Symmetric matrix with shape (n_win, n_conn)
    '''
    # Standard param checks
    #errors.check_type(adj_matr, np.ndarray)
    if not len(adj_matr.shape) == 3:
        raise ValueError('%r requires 3-dimensions (n_win, n_node, n_node)')

    # Compute number of nodes
    n_node = adj_matr.shape[1]

    # Compute upper triangle indices (by convention)
    triu_ix, triu_iy = np.triu_indices(n_node, k=1)

    # Convert to configuration matrix
    cfg_matr = adj_matr[:, triu_ix, triu_iy]

    return cfg_matr

In [None]:
def compute_pval_twotail(test_val,null_dist,plot=False,plot_title=''):
    '''
    Compute the p-value and z-score for a test value given a null distribution

    Parameters
    ----------
        test_val: float
            numeric value to compare to null distribution
        null_dist: numpy array
            1D array of numeric values to compare with test_val
        plot: boolean
            boolean indicating whether to plot the distribution

    Returns
    -------
        zval: float
            z-score of the test_val
        pval: float
            p-value of the test_val
    '''
    pval=ss.percentileofscore(null_dist,test_val)/100
    zval=ss.norm.ppf(pval)
    if pval>0.5:
        pval=(1-pval)
    # Set zvals where p=0.00000 to an arbitrary value 
    #in this case p=.0005 and z=3.29 or -3.29, 
    #which should be greater than max value so long as len(null_dist)<2000
    if zval==-inf:
        zval=ss.norm.ppf(.0005)
    elif zval==inf:
        zval=ss.norm.ppf(.9995)
    if plot:
        plt.hist(np.append(test_val,null_dist),bins=50)
        plt.axvline(x=test_val,color='black')
        plt.text(x=np.max(null_dist)*.9,y=10,s='P-val={}'.format(pval))
        plt.title(plot_title)
        plt.show()
        plt.close()
    return zval,pval

In [None]:
path_zData=opj(home_path1,'data','netLearn_ppi_zscores')
filelist=[f for f in os.listdir(path_zData) if f.endswith('.csv')]
for filename in filelist:
    data=pd.read_csv(opj(path_zData,filename))
    data=data.replace(-4,ss.norm.ppf(0.0005))
    data=data.replace(4,ss.norm.ppf(0.9995))
    data=data.drop('Unnamed: 0',axis=1)
    data.to_csv(opj(path_zData,filename))

In [None]:
data

In [None]:
np.array(data)

### Import data

In [None]:
#load schaefer mask
schaefer_mask=nb.load('{}/schaefer400_harvard_oxford_2mm_mni_17network.nii.gz'.format(template_dir))

#load schaefer atlas info
#schaefer_atlas=pd.read_pickle('{}/s400ho_ns_netLearn_2mm.pickle'.format(template_dir))
schaefer_atlas=pd.read_csv('{}/s400ho_netLearn_2mm.csv'.format(template_dir))
schaefer_atlas_new=pd.read_pickle('{}/s400ho_ns_netLearn_2mm_new.pickle'.format(template_dir))

schaefer_atlas=schaefer_atlas.fillna('Uncertain')
schaefer_atlas.loc[schaefer_atlas['System']=='Uncertain','System']='Subcortical'

schaefer_atlas.loc[[402,407],'System']='Hippocampus'
schaefer_atlas.loc[[402,407],'System7']='Hippocampus'

sch_names=np.unique(schaefer_atlas['System'])
sch_nums=[int(np.where(sch_names==label)[0]) for label in schaefer_atlas['System']]


schaefer_atlas.loc[schaefer_atlas['System7']=='Uncertain','System7']='Subcortical'
sch7_names=np.unique(schaefer_atlas['System7'])
sch7_nums=[int(np.where(sch7_names==label)[0]) for label in schaefer_atlas['System7']]

net_coords=np.array(schaefer_atlas.loc[:,['x','y','z']])
net_cols=['black']*len(net_coords)

n_node = len(sch_nums)
triu_ix, triu_iy = np.triu_indices(n_node, k=1)
n_conn = len(triu_ix)

n_perm = 500

schaefer_atlas.head()

In [None]:
schaefer_atlas_new.loc[schaefer_atlas['ns_ROI_label']!=schaefer_atlas_new['ns_ROI_label'],['nID','Name','ROI_label','ns_ROI_label']]

In [None]:
schaefer_atlas.loc[schaefer_atlas['ns_ROI_label']!=schaefer_atlas_new['ns_ROI_label'],['nID','Name','ROI_label','ns_ROI_label']]

In [None]:
for ix in [363,364,371]:
    print(ix,'Old: {}'.format(schaefer_atlas.loc[ix,'ns_ROI_label']),
          ' New: {}'.format(schaefer_atlas_new.loc[ix,'ns_ROI_label']))

In [None]:
schaefer_atlas.loc[:,['nID','Name','System7','Peak AAL Label']].head()

In [None]:
diffhubs_ns_new=[165, 177, 182, 304, 308, 310, 332, 364, 370, 376, 378]
diffhubs_ns_old=[165, 177, 182, 304, 308, 310, 332, 370, 371, 376, 378]

In [None]:
schaefer_atlas=schaefer_atlas.drop(['ID', u'Name', 'netNum','Label', 'lNum', 'col1', 'col2', 'col3','col4',
                     'Peak AAL Label','Mode AAL Label',
                     'mentalizing','memory','workMem','reward',
                     'tpj','mpfc','ifg','vlpfc','dlpfc', 'pcc', 'hippocampus', 'striatum', 
                     'MentvWM', 'MentvMem','ment_bi','workMem_bi','mem_bi'],axis=1)
schaefer_atlas.to_csv(opj(template_dir,'s400ho_netLearn_2mm.csv'))

# 2: Compute edge-wise z-scores for connectivity matrices

In [None]:
# Load subject data
diffData=np.load(opj(path_InpData,'netLearn_diff_transition_PPI_26subjs.npz'))
combData=np.load(opj(path_InpData,'netLearn_comb_transition_PPI_26subjs.npz'))

diffMat1=np.expand_dims(np.mean(diffData['ppiMat'],axis=0),axis=0)
combMat1=np.expand_dims(np.mean(combData['ppiMat'],axis=0),axis=0)

# Load null data
comb_nullMat=np.load(opj(path_NullData,'netLearn_comb_nulldata_transition_PPI_26subjs_groupavg.npz'))['cfgMat']
diff_nullMat=np.load(opj(path_NullData,'netLearn_diff_nulldata_transition_PPI_26subjs_groupavg.npz'))['cfgMat']

In [None]:
def get_edge_zscores(roi_cfg,null_cfg,thresh=False,savename='test'):
    print('')
    roi_cfg=roi_cfg.squeeze()
    null_cfg=null_cfg.squeeze()
    nconn=roi_cfg.shape[-1]
    zmat=np.zeros(roi_cfg.shape)
    pmat=zmat.copy()
    for x in range(nconn):
        if x==0:
            print('Working on edge 1',end='')
        elif (x+1)%(int(nconn/10))==0:
            print('...{}'.format(x+1),end='')
        elif (x+1)==nconn:
            print('')
        zval,pval=compute_pval_twotail(roi_cfg[x],null_cfg[:,x])
        zmat[x]=zval
        pmat[x]=pval
        
    print('...')
    print('Computed zMat and pMat for {} edges'.format(len(pmat)))
    if thresh:
        reject_fdr,fdr_pvals=fdr_correction(pmat)
        n_sig=np.sum(reject_fdr)
        print('# of significant edge: {}'.format(n_sig))
        zmat=np.multiply(zmat,reject_fdr)
        pmat=fdr_pvals
    savefile='{}/{}'.format(path_OutpData,savename)
    np.savez(savefile,zMat=zmat,pMat=pmat)
    print('Saving file to {}.npz'.format(savefile))
    print('...')
    #return zMat,pMat

get_edge_zscores(roi_cfg=diffMat1,null_cfg=diff_nullMat,savename='netLearn_diffData_zscores_groupavg_410nodes_connmat')
get_edge_zscores(roi_cfg=combMat1,null_cfg=comb_nullMat,savename='netLearn_combData_zscores_groupavg_410nodes_connmat')


# 3: Compute node-strength z-scores

In [None]:
conlist=['diff','comb','soc','nonSoc']

In [None]:
#adj_mat is 3-d matrix with n_subj x n_conn shape
#colID is a string indicating the node to get connectivity with
#colindex is a list of strings indicating the names of each row/column in adj_mat
def get_nodestr_means(cfg_mat):
    adj_mat=conv_cfg_vec_to_adj_matr(cfg_mat)
    rowMeans=np.mean(adj_mat,axis=-1)
    return rowMeans

def get_null_nodestr_means(cfg_mat):
    adj_mat=np.zeros((n_perm,n_node,n_node))
    for xx in range(n_perm):
        adj_mat[xx,:,:]=conv_cfg_vec_to_adj_matr(cfg_mat[xx,:])
    rowMeans=np.mean(adj_mat,axis=-1)
    return rowMeans


def get_nodestr_zscores(task,thresh=False,savename='test'):
    # Load subject data
    cfg_data=np.load(opj(path_InpData,'netLearn_{}_transition_PPI_26subjs.npz'.format(task)))
    
    # Compute subject average
    roi_cfg=np.mean(cfg_data['ppiMat'],axis=0)
    
    # Load null data (already averaged across subjects)
    null_cfg=np.load(opj(path_NullData,'netLearn_{}_nulldata_transition_PPI_26subjs_groupavg.npz'.format(task)))['cfgMat']
    print('')
    print('Getting node strength vals')
    node_vec=get_nodestr_means(roi_cfg)
    print('Getting nullmat node strength vals')
    null_mat=get_null_nodestr_means(null_cfg)
    zMat=np.zeros(node_vec.shape)
    pMat=zMat.copy()
    for x in range(n_node):
        if x==0:
            print('Working on node 1',end='')
        elif (x+1)%(int(n_node/10))==0:
            print('...{}'.format(x+1),end='')
        elif (x+1)==n_node:
            print('')
        zval,pval=compute_pval_twotail(node_vec[x],null_mat[:,x])
        zMat[x]=zval
        pMat[x]=pval
    print('...')
    print('Computed zMat and pMat for {} nodes'.format(len(pMat)))
    #np.array([(ss.percentileofscore(null_mat[:,x],roi_mat[x])/100) for x in range(n_node)])
    if thresh:
        reject_fdr,fdr_pvals=fdr_correction(pMat)
        n_sig=np.sum(reject_fdr)
        print('# of significant nodes is: {}'.format(n_sig))
        zMat=np.multiply(zMat,reject_fdr)
        pMat=fdr_pvals
    
    savefile='{}/{}'.format(path_OutpData,savename)
    np.savez(savefile,zMat=zMat,pMat=pMat)
    print('Saving file to {}.npz'.format(savefile))
    print('...')
    #return zMat,pMat


for task in conlist:
    print('Working on contrast {}'.format(task))
    get_nodestr_zscores(task,thresh=False,savename='netLearn_{}Data_zscores_26subjs_nodestr'.format(task))

# 4: Get list of hubs

All Tasks- Global Connectivity
10 significant positive ROIs in a priori regions

Social versus Non-Social- Global Connectivity
6 significant positive ROIs in a priori regions

Social versus Non-Social- Global Connectivity
11 significant positive ROIs in a priori regions

Column ids for dmPFC_L= [165, 174]
Column ids for vmPFC_L= [160]
Column ids for TPJ_R= [244]
Column ids for Hippocampus_R= [407]
Column ids for combhubs= [44, 160, 165, 174, 244, 370, 376, 378, 402, 407]
Column ids for TPJ_L= [44]
Column ids for Hippocampus_L= [402]
Column ids for Hippocampus= [402, 407]
Column ids for dmPFC_R= [370, 376, 378]

Column ids for TPJ_R= [262, 394, 396, 397]
Column ids for diffhubs= [85, 172, 262, 394, 396, 397]
Column ids for TPJ_L= [85, 172]

Column ids for lPFC_L= [182]
Column ids for dmPFC_L= [165, 177]
Column ids for Frontal_Inf_Orb_R= [308]
Column ids for diffhubs_ns= [165, 177, 182, 304, 308, 310, 332, 370, 371, 376, 378]
Column ids for Frontal_Inf_Oper_R= [332]
Column ids for Frontal_Inf_Orb_L= [182]
Column ids for dmPFC_R= [310, 370, 371, 376, 378]
Column ids for lPFC_R= [308, 332, 304]
Column ids for Frontal_Inf_Tri_R= [304]

In [None]:

list1=['combData','diffData','nonSocData','SocData']
list1_labels=['All Tasks','Social versus Non-Social','Non-Social Task','Social Task']
list2=['nodestr']
list2_labels=['Global']


apriori_names=['dmPFC_L','vmPFC_L','PCC_L','Hippocampus_L','TPJ_L',
               'dmPFC_R','vmPFC_R','PCC_R','Hippocampus_R','TPJ_R',
               'Frontal_Inf_Oper_L','Frontal_Inf_Orb_L','Frontal_Inf_Tri_L',
               'Frontal_Inf_Oper_R','Frontal_Inf_Orb_R','Frontal_Inf_Tri_R',
               'Amygdala_L','Amygdala_R',
               'Ventral_Striatum_L','Caudate_L',
               'Ventral_Striatum_R','Caudate_R']



def identify_hubs1(ix,iy,thresh=True,apriori=True,flip=False,alpha=0.05):
    xx=list2[ix]
    yy=list1[iy]
    data=np.load('{}/netLearn_{}_zscores_26subjs_{}.npz'.format(path_zData,yy,xx))
    zMat=data['zMat']
    pMat=data['pMat']
    #pMat=np.multiply(data['pMat'],2)
    
    if flip:
        zMat=np.multiply(zMat,-1)
    
    # Set nonsignificant values to zero using FDR correction
    if thresh:
        reject_fdr,pval_fdr=fdr_correction(pMat,alpha)
        zMat=np.multiply(zMat,reject_fdr)

    title='{}- {} Connectivity'.format(list1_labels[iy],list2_labels[ix])
    sig_ix=np.where(zMat>0)[0]
    sig_dic={}
    sig_vec=np.zeros(zMat.shape)
    if apriori:
        sig_ix=[x for x in sig_ix if schaefer_atlas.loc[x,'ns_ROI_label'] in apriori_names]
        for i,x in enumerate(apriori_names):
            sig_dic[x]=[ix for ix in sig_ix if schaefer_atlas.loc[ix,'ns_ROI_label']==x]
            sig_dic=dict((k, v) for k, v in sig_dic.iteritems() if v)

    sig_vec[sig_ix]=1
        
    print('')
    print(title)
    print('{} significant positive ROIs in a priori regions'.format(len(sig_ix)))
    return sig_ix,sig_vec,sig_dic
    
sigvals_comb,sigvec_comb,sigdic_comb=identify_hubs1(0,0,alpha=0.025)
signames_comb=schaefer_atlas.loc[sigvals_comb,'ns_ROI_label']

sigvals_diff,sigvec_diff,sigdic_diff=identify_hubs1(0,1,alpha=0.025)
signames_diff=schaefer_atlas.loc[sigvals_diff,'ns_ROI_label']


sigvals_diff_ns,sigvec_diff_ns,sigdic_diff_ns=identify_hubs1(0,1,flip=True,alpha=0.025)
signames_diff_ns=schaefer_atlas.loc[sigvals_diff_ns,'ns_ROI_label']

sigdic_comb['Hippocampus']=[402,407]
sigdic_comb['combhubs']=sigvals_comb
sigdic_diff['diffhubs']=sigvals_diff
sigdic_diff_ns['diffhubs_ns']=sigvals_diff_ns

lpfc_l_vals=[sigdic_diff_ns[key] for key in sigdic_diff_ns.keys() if 'Frontal' in key and '_L' in key]
sigdic_diff_ns['lPFC_L']=[item for sublist in lpfc_l_vals for item in sublist]
lpfc_r_vals=[sigdic_diff_ns[key] for key in sigdic_diff_ns.keys() if 'Frontal' in key and '_R' in key]
sigdic_diff_ns['lPFC_R']=[item for sublist in lpfc_r_vals for item in sublist]


for x in sigdic_comb.keys():
    print('Column ids for {}= {}'.format(x,sigdic_comb[x]))
    
print('')
for x in sigdic_diff.keys():
    print('Column ids for {}= {}'.format(x,sigdic_diff[x]))
    
print('')
for x in sigdic_diff_ns.keys():
    print('Column ids for {}= {}'.format(x,sigdic_diff_ns[x]))

In [None]:
schaefer_atlas.loc[[363,364,371],['nID','x','y','z','ns_ROI_label','ROI_label']]

In [None]:

list1=['combData','diffData','nonSocData','SocData']
list1_labels=['All Tasks','Social versus Non-Social','Non-Social Task','Social Task']
list2=['all']
list2_labels=['Global']


apriori_names=['dmPFC_L','vmPFC_L','PCC_L','Hippocampus_L','TPJ_L',
               'dmPFC_R','vmPFC_R','PCC_R','Hippocampus_R','TPJ_R',
               'Frontal_Inf_Oper_L','Frontal_Inf_Orb_L','Frontal_Inf_Tri_L',
               'Frontal_Inf_Oper_R','Frontal_Inf_Orb_R','Frontal_Inf_Tri_R',
               'Amygdala_L','Amygdala_R',
               'Ventral_Striatum_L','Caudate_L',
               'Ventral_Striatum_R','Caudate_R']



def identify_hubs1(ix,iy,thresh=True,apriori=True,flip=False,alpha=0.05):
    xx=list2[ix]
    yy=list1[iy]
    data=np.load('{}/netLearn_{}_zscores_410nodes_{}.npz'.format(path_zData,yy,xx))
    zMat=data['zMat']
    pMat=data['pMat']
    
    zMat[zMat=4]
    #pMat=np.multiply(data['pMat'],2)
    
    if flip:
        zMat=np.multiply(zMat,-1)
    
    # Set nonsignificant values to zero using FDR correction
    if thresh:
        reject_fdr,pval_fdr=fdr_correction(pMat,alpha)
        zMat=np.multiply(zMat,reject_fdr)

    title='{}- {} Connectivity'.format(list1_labels[iy],list2_labels[ix])
    sig_ix=np.where(zMat>0)[0]
    sig_dic={}
    sig_vec=np.zeros(zMat.shape)
    if apriori:
        sig_ix=[x for x in sig_ix if schaefer_atlas.loc[x,'ns_ROI_label'] in apriori_names]
        for i,x in enumerate(apriori_names):
            sig_dic[x]=[ix for ix in sig_ix if schaefer_atlas.loc[ix,'ns_ROI_label']==x]
            sig_dic=dict((k, v) for k, v in sig_dic.iteritems() if v)

    sig_vec[sig_ix]=1
        
    print('')
    print(title)
    print('{} significant positive ROIs in a priori regions'.format(len(sig_ix)))
    return sig_ix,sig_vec,sig_dic
    
sigvals_comb,sigvec_comb,sigdic_comb=identify_hubs1(0,0,alpha=0.025)
signames_comb=schaefer_atlas.loc[sigvals_comb,'ns_ROI_label']

sigvals_diff,sigvec_diff,sigdic_diff=identify_hubs1(0,1,alpha=0.025)
signames_diff=schaefer_atlas.loc[sigvals_diff,'ns_ROI_label']


sigvals_diff_ns,sigvec_diff_ns,sigdic_diff_ns=identify_hubs1(0,1,flip=True,alpha=0.025)
signames_diff_ns=schaefer_atlas.loc[sigvals_diff_ns,'ns_ROI_label']

sigdic_comb['Hippocampus']=[402,407]
sigdic_comb['combhubs']=sigvals_comb
sigdic_diff['diffhubs']=sigvals_diff
sigdic_diff_ns['diffhubs_ns']=sigvals_diff_ns

lpfc_l_vals=[sigdic_diff_ns[key] for key in sigdic_diff_ns.keys() if 'Frontal' in key and '_L' in key]
sigdic_diff_ns['lPFC_L']=[item for sublist in lpfc_l_vals for item in sublist]
lpfc_r_vals=[sigdic_diff_ns[key] for key in sigdic_diff_ns.keys() if 'Frontal' in key and '_R' in key]
sigdic_diff_ns['lPFC_R']=[item for sublist in lpfc_r_vals for item in sublist]


for x in sigdic_comb.keys():
    print('Column ids for {}= {}'.format(x,sigdic_comb[x]))
    
print('')
for x in sigdic_diff.keys():
    print('Column ids for {}= {}'.format(x,sigdic_diff[x]))
    
print('')
for x in sigdic_diff_ns.keys():
    print('Column ids for {}= {}'.format(x,sigdic_diff_ns[x]))

In [None]:
roi_names=['dmPFC_L','vmPFC_L','PCC_L','Hippocampus_L','TPJ_L',
           'dmPFC_R','vmPFC_R','PCC_R','Hippocampus_R','TPJ_R',
           'lPFC_L','Amygdala_L','Ventral_Striatum_L','Caudate_L',
           'lPFC_R','Amygdala_R','Ventral_Striatum_R','Caudate_R']
    
roi_names=sorted(roi_names, key=lambda s: s.lower())

In [None]:
def sub_dict_remove1(mydict, somekeys,default=None):
    somedict=mydict.copy()
    tdict=dict([ (k, somedict.pop(k, default)) for k in somekeys ])
    return tdict

def sub_dict_remove2(mydict, somekeys,default=None):
    somedict=mydict.copy()
    tdict=dict([ (k, somedict.pop(k, default)) for k in somekeys ])
    return dict((k, v) for k, v in tdict.iteritems() if v)

sigdic_rois={'ns_hub':sigdic_diff_ns.copy(),
             'soc_hub':sigdic_diff.copy(),
             'comb_hub':sigdic_comb.copy()}

for key in sigdic_rois:
    sigdic_rois[key]=sub_dict_remove2(sigdic_rois[key],roi_names)
    
syslist_rois={}
for k1 in sigdic_rois.keys():
    for k2 in sigdic_rois[k1].keys():
        syslist_rois['{}_{}'.format(k1,k2)]=sigdic_rois[k1][k2]
                
print('{} hubs in syslist_rois'.format(len(syslist_rois)))

keys1=[key for key in syslist_rois.keys() if 'soc' in key or 'ns' in key or 'Hippocampus_' in key]

syslist_rois2=sub_dict_remove2(syslist_rois,keys1)

syslist_rois2

# 5: Get z-scored hub connectivity for each subject

In [None]:
# In[ ]:
#adj_mat is 3-d matrix with n_subj x n_conn shape
#colID is a string indicating the node to get connectivity with
#colindex is a list of strings indicating the names of each row/column in adj_mat
def get_means(adj_mat,row_ix,col_ix):
    #cfg_mat is a 3D n_subjs x n_perm x n_conn np array
    #row_ix is a list of the row indices to include in the means to extract
    #col_ix is a list of the column indices to include in the means to extract
    subdata=adj_mat[:,row_ix,:]
    subdata=subdata[:,:,col_ix]
    rowMeans=np.mean(subdata,axis=(-1,-2))
    return rowMeans

def get_null_means(adj_mat,row_ix,col_ix):
    #cfg_mat is a 3D n_subjs x n_perm x n_conn np array
    #row_ix is a list of the row indices to include in the means to extract
    #col_ix is a list of the column indices to include in the means to extract
    subdata=adj_mat[:,:,row_ix,:]
    subdata=subdata[:,:,:,col_ix]
    rowMeans=np.mean(subdata,axis=(-1,-2))
    return rowMeans

def get_subhub_zscores(ppi_mat,null_mat,row_id,col_id,row_dict=None,col_dict=None):
    #roi_cfg is a 2D n_subjs x n_conn np array
    #nulll_cfg is a 3D n_subjs x n_perm x n_conn np array
    #row_id is a string that matches a key in the row_dict (or is 'all' which will get global connectivity)
    #row_dict is a dictionary with keys matching a priori hubs and row indices for each hub
    #col_id is a string that matches a key in the col_dict (or is 'all' which will get global connectivity)
    #col_dict is a dictionary with keys matching a priori hubs and column indices for each hub
    #thresh is a boolean determining whether to set non-significant z-scores to zero
    print('')
    if row_id=='all':
        row_ix=range(410)
    else:
        row_ix=row_dict[row_id]
    
    if col_id=='all':
        col_ix=range(410)
    else:
        col_ix=col_dict[col_id]
        
    print('Getting subject averages for {} x {}'.format(row_id,col_id))
    node_vec=get_means(ppi_mat,row_ix,col_ix)
    
    print('Getting nullmat averages for {} x {}'.format(row_id,col_id))
    node_null_mat=get_null_means(null_mat,row_ix,col_ix)
    
    #Create array of zeros to fill with z-scores and p-values
    zMat=np.zeros(node_vec.shape)
    pMat=zMat.copy()
    for x in range(n_subjs):
        zval,pval=compute_pval_twotail(node_vec[x],node_null_mat[x,:])
        zMat[x]=zval
        pMat[x]=pval

    print('...')
    print('Computed zMat and pMat for {} subjects'.format(len(pMat)))    
    return zMat,pMat

def load_mat(filename):
    # Load subject data
    print('Loading {}'.format(opj(path_InpData,filename)))
    np_data=np.load(opj(path_InpData,'netLearn_{}_transition_PPI_26subjs.npz'.format(task)))
    print('Loaded cfgmat with shape {}'.format(np_data['ppiMat'].shape))
    ppi_mat=np.array([conv_cfg_vec_to_adj_matr(np_data['ppiMat'][x,:]) for x in range(n_subjs)])
    print('Converted ppimat to shape {}'.format(ppi_mat.shape))
    return ppi_mat
    
def load_null_mat(null_file):
    print('Loading {}'.format(opj(path_NullData,null_file)))
    null_cfg=np.load(opj(path_NullData,null_file))['cfgMat']
    # Compute number of nodes
    d1,d2,d3=null_cfg.shape
    print('Loaded cfgmat with shape {} x {} x {}'.format(d1,d2,d3))
    
    nconn=d3
    numnode = int(np.floor(np.sqrt(2*(nconn)))+1)
    null_mat = np.zeros((d1,d2,numnode, numnode))

    for dd1 in range(d1):
        for dd2 in range(d2):
            null_mat[dd1,dd2,:,:]=conv_cfg_vec_to_adj_matr(null_cfg[dd1,dd2,:])

    print('Converted nullmat to shape {}'.format(null_mat.shape))
    return null_mat

def mult_subj_zscores1(task):
    #roi_cfg is a 2D n_subjs x n_conn np array
    #nulll_cfg is a 3D n_subjs x n_perm x n_conn np array
    #savename is a string to add to csv file being saved
    df=pd.DataFrame()
    
    # load connectivity matrices
    print('')
    ppi_mat=load_mat('netLearn_{}_transition_PPI_26subjs.npz'.format(task))
    print('')
    null_mat=load_null_mat('netLearn_{}_nulldata_transition_PPI_26subjs.npz'.format(task))
    
    for a in ['combhubs','Hippocampus','Hippocampus_L','Hippocampus_R']:
    #for a in ['Hippocampus','Hippocampus_L','Hippocampus_R','Frontal_Inf_Orb_L','Frontal_Inf_Tri_R','dmPFC_L','dmPFC_R','vmPFC_L']:
            colname='{}_global'.format(a)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,a,'all',sigdic_comb,sigdic_comb)
            df[colname]=sub_z
            print('')

    #for a in ['PCC_L','TPJ_L','TPJ_R']:
    for a in ['diffhubs','TPJ_L','TPJ_R']:
            colname='{}_global'.format(a)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,a,'all',sigdic_diff,sigdic_diff)
            df[colname]=sub_z
            print('')
            
    #for a in ['PCC_L','TPJ_L','TPJ_R']:
    for a in ['diffhubs_ns','dmPFC_L','dmPFC_R','lPFC_L','lPFC_R']:
            colname='{}_global'.format(a)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,a,'all',sigdic_diff_ns,sigdic_diff_ns)
            df[colname]=sub_z
            print('')
    
    df.to_csv('{}/netLearn_{}Data_zscores_26subjs_sighubs_nodestr.csv'.format(path_OutpData,task))
    return df



In [None]:
test_null=load_null_mat('netLearn_soc_nulldata_transition_PPI_26subjs.npz')
test=load_mat('netLearn_soc_transition_PPI_26subjs.npz')

zmeans=get_subhub_zscores(test,test_null,'Hippocampus_L','all',sigdic_comb,sigdic_comb)


In [None]:
hl=np.mean(test[:,402,:],axis=(-1,-2))
hl_null=np.mean(test_null[:,:,402,:],axis=(-1,-2))
hl1=get_means(adj_mat=test,col_ix=range(410),row_ix=[402])
hl1_null=get_null_means(adj_mat=test_null,col_ix=range(410),row_ix=[402])

for n in range(n_subjs):
    

In [None]:
hl=test[:,[402],:]
hl=hl[:,:,range(410)]
hl2=np.mean(hl,axis=(-1,-2))


In [None]:
soc_df

In [None]:
soc_df=mult_subj_zscores1('soc')

In [None]:
def mult_subj_zscores2(task):
    #roi_cfg is a 2D n_subjs x n_conn np array
    #nulll_cfg is a 3D n_subjs x n_perm x n_conn np array
    #savename is a string to add to csv file being saved
    df=pd.DataFrame()
            
    for x in ['Hippocampus','Hippocampus_L','Hippocampus_R']:
        for y in ['dmPFC_L','dmPFC_R','lPFC_L','lPFC_R']:
        #for y in ['Frontal_Inf_Orb_L','Frontal_Inf_Tri_R','dmPFC_L','dmPFC_R','vmPFC_L']:
        #for y in ['all','Frontal_Inf_Orb_L']:
            colname='{}_X_{}'.format(x,y)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,x,y,sigdic_comb,sigdic_diff_ns)
            df[colname]=sub_z
            print('')

        #for y in ['PCC_L','TPJ_L','TPJ_R']:
        for y in ['TPJ_L','TPJ_R']:
            colname='{}_X_{}'.format(x,y)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,x,y,sigdic_comb,sigdic_diff)
            df[colname]=sub_z
            print('')

    for x in ['TPJ_L','TPJ_R']:
        for y in ['dmPFC_L','dmPFC_R','lPFC_L','lPFC_R']:
        #for y in ['Frontal_Inf_Orb_L','Frontal_Inf_Tri_R','dmPFC_L','dmPFC_R','vmPFC_L']:
            colname='{}_X_{}'.format(x,y)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,x,y,sigdic_diff,sigdic_diff_ns)
            df[colname]=sub_z
            print('')

        for y in ['TPJ_L','TPJ_R']:
            if x!=y:
                colname='{}_X_{}'.format(x,y)
                sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,x,y,sigdic_diff,sigdic_diff)
                df[colname]=sub_z
                print('')
    
    df.to_csv('{}/netLearn_{}Data_zscores_26subjs_sighubs_to_sighubs.csv'.format(path_OutpData,task))
    return df



In [None]:
nullmeans=get_null_means(test_null,range(10),range(10))
nullmeans.shape
tmeans=get_means(test,range(10),range(10))
tmeans.shape

In [None]:
soc_df=mult_subj_zscores1(SocData,null_SocData,'SocData')
ns_df=mult_subj_zscores1(NSData,null_NSData,'NSData')

comb_df=mult_subj_zscores1(combData,null_combData,'combData')
diff_df=mult_subj_zscores1(diffData,null_diffData,'diffData')




comb_df=mult_subj_zscores2(combData,null_combData,'combData')
diff_df=mult_subj_zscores2(diffData,null_diffData,'diffData')

ns_df=mult_subj_zscores2(NSData,null_NSData,'NSData')
soc_df=mult_subj_zscores2(SocData,null_SocData,'SocData')

# 5: Compute system z-scores

In [None]:
def get_means2(cfg_mat,row_ix,col_ix):
    '''
    Computes the mean connectivity for each subject for a given set of nodes

    Parameters
    ----------
        cfg_mat: numpy array
            2D n_subjs x n_conn np array OR
            3D n_subjs x n_node x n_node np array
        row_ix: list
            row indices to include in the means to extract
        col_ix: list
            col indices to include in the means to extract

    Returns
    -------
        rowMeans: numpy array
            1D n_subjs np array
    '''
    if len(cfg_mat.shape)==3:
        adj_mat=cfg_mat.copy()
    else:
        adj_mat=np.array([conv_cfg_vec_to_adj_matr(cfg_mat[x,:]) for x in range(cfg_mat.shape[0])])
    #adj_mat=np.array([conv_cfg_vec_to_adj_matr(cfg_mat[x,:]) for x in range(cfg_mat.shape[0])])
    subdata=adj_mat[:,row_ix,:]
    subdata=subdata[:,:,col_ix]
    rowMeans=np.mean(subdata,axis=(-1,-2)) #note: how to handle rows that contain the diagonal value?
    return rowMeans

def get_null_means2(cfg_mat,row_ix,col_ix):
    '''
    Computes the mean connectivity for each subject and each null model for a given set of nodes

    Parameters
    ----------
        cfg_mat: numpy array
            3D n_subjs x n_perm x n_conn np array
        row_ix: list
            row indices to include in the means to extract
        col_ix: list
            col indices to include in the means to extract

    Returns
    -------
        rowMeans: numpy array
            2D n_perm x n_subjs np array
    '''
    n_subjs=cfg_mat.shape[0]
    n_perm=cfg_mat.shape[1]
    n_node=conv_cfg_vec_to_adj_matr(cfg_mat[0,0,:]).shape[0]

    adj_mat=np.zeros((n_subjs,n_perm,n_node,n_node))
    for xx in range(n_subjs):
        for yy in range(n_perm):
            adj_mat[xx,yy,:,:]=conv_cfg_vec_to_adj_matr(cfg_mat[xx,yy,:])
    subdata=adj_mat[:,:,row_ix,:]
    subdata=subdata[:,:,:,col_ix]
    rowMeans=np.mean(subdata,axis=(-1,-2))
    return rowMeans

def get_sys_zscores(conn_mat,null_mat,node_df,sys_names,colindex,
                    plot=None,print_vals=False,thresh=True,alpha=0.025):
    sys_df=pd.DataFrame(np.zeros((len(sys_names),len(sys_names))))
    sys_df.index=sys_names
    sys_df.columns=sys_names
    sys_df_zvals=sys_df.copy()
    sys_df_pvals=sys_df.copy()
        
    for sys1 in sys_names:
        for sys2 in sys_names:
            sys_ix=[i for i,x in enumerate(node_df[colindex]) if sys1==x]
            sys_iy=[j for j,y in enumerate(node_df[colindex]) if sys2==y]
            #print('{}: {}'.format(sys1,sys_ix))
            #print('{}: {}'.format(sys2,sys_iy))
            sys_mean=get_means2(conn_mat,sys_ix,sys_iy)
            sys_null_dist=get_null_means2(np.expand_dims(null_mat,axis=0),sys_ix,sys_iy)
            sys_z,sys_p=compute_pval_twotail(sys_mean[0],sys_null_dist[0,:])
            if print_vals:
                print('{} x {} z={}, p={}'.format(sys1,sys2,sys_z,sys_p))
            sys_df.loc[sys1,sys2]=sys_mean
            sys_df_zvals.loc[sys1,sys2]=sys_z
            sys_df_pvals.loc[sys1,sys2]=sys_p
            
    if thresh:
        reject_fdr,pval_fdr=fdr_correction(sys_df_pvals,alpha)
        sys_df_zvals_fdr=np.multiply(sys_df_zvals,reject_fdr)
        sys_dict={'sys_names':sys_names,
                  'sys_df':sys_df,
                  'sys_df_zvals':sys_df_zvals,
                  'sys_df_zvals_fdr':sys_df_zvals_fdr,
                  'sys_df_pvals':sys_df_pvals}
    else:
        sys_dict={'sys_names':sys_names,
                  'sys_df':sys_df,
                  'sys_df_zvals':sys_df_zvals,
                  'sys_df_pvals':sys_df_pvals}
    if plot=='thresh':
        ax1=plotting.plot_matrix(sys_df_zvals_fdr,vmin=-4,vmax=4,labels=sys_names,auto_fit=False)
        plt.show()
    elif plot=='unc':
        ax1=plotting.plot_matrix(sys_df_zvals,vmin=-4,vmax=4,labels=sys_names,auto_fit=False)
        plt.show()
        
    return sys_dict

In [None]:
def get_sys_zscores2(conn_mat,null_mat,syslist,
                     plot=True,thresh='fdr',alpha=0.025,print_vals=True):
    sys_names=syslist.keys()
    sys_names.sort()
    n_sys=len(syslist)
    sys_df=pd.DataFrame(np.zeros((n_sys,n_sys)))
    sys_df.index=sys_names
    sys_df.columns=sys_names
    sys_df_zvals=sys_df.copy()
    sys_df_pvals=sys_df.copy()
    
    for sys1 in sys_names:
        for sys2 in sys_names:
            sys_ix=syslist[sys1]
            sys_iy=syslist[sys2]
            #print('{}: {}'.format(sys1,sys_ix))
            #print('{}: {}'.format(sys2,sys_iy))
            sys_mean=get_means2(conn_mat,sys_ix,sys_iy)
            sys_null_dist=get_null_means2(np.expand_dims(null_mat,axis=0),sys_ix,sys_iy)
            sys_z,sys_p=compute_pval_twotail(sys_mean[0],sys_null_dist[0,:])
            if print_vals:
                print('{} x {} z={}, p={}'.format(sys1,sys2,sys_z,sys_p))
            sys_df.loc[sys1,sys2]=sys_mean
            sys_df_zvals.loc[sys1,sys2]=sys_z
            sys_df_pvals.loc[sys1,sys2]=sys_p

    if thresh=='fdr':
        reject_fdr,pval_fdr=fdr_correction(sys_df_pvals,alpha)
        sys_df_zvals_fdr=np.multiply(sys_df_zvals,reject_fdr)
        sys_dict={'sys_names':sys_names,
                  'sys_df':sys_df,
                  'sys_df_zvals':sys_df_zvals,
                  'sys_df_zvals_fdr':sys_df_zvals_fdr,
                  'sys_df_pvals':sys_df_pvals}
    elif thresh=='unc':
        thmat=np.where(sys_df_pvals<alpha,1,0)
        sys_df_zvals_unc=np.multiply(sys_df_zvals,thmat)
        sys_dict={'sys_names':sys_names,
                  'sys_df':sys_df,
                  'sys_df_zvals':sys_df_zvals,
                  'sys_df_pvals':sys_df_pvals}
    
    else:
        sys_dict={'sys_names':sys_names,
                  'sys_df':sys_df,
                  'sys_df_zvals':sys_df_zvals,
                  'sys_df_pvals':sys_df_pvals}
    
    if plot=='thresh':
        ax1=plotting.plot_matrix(sys_df_zvals_fdr,vmin=-4,vmax=4,labels=sys_names,auto_fit=False)
        plt.show()
    elif plot=='all':
        ax1=plotting.plot_matrix(sys_df_zvals,vmin=-4,vmax=4,labels=sys_names,auto_fit=False)
        plt.show()
    return sys_dict

In [None]:
diff_roihub_dict=get_sys_zscores2(diffMat1,diff_nullMat,syslist_rois2,
                                plot=True,thresh='fdr',print_vals=True)

In [None]:
diff_cog_sys_dict=get_sys_zscores(conn_mat=diffMat1,null_mat=diff_nullMat,
                                  node_df=schaefer_atlas,sys_names=sch_names,colindex='System',
                                  plot='unc',print_vals=True,thresh=True,alpha=0.025)

In [None]:
comb_cog_sys_dict=get_sys_zscores(conn_mat=combMat1,null_mat=comb_nullMat,
                                  node_df=schaefer_atlas,sys_names=sch_names,colindex='System',
                                  plot='unc',print_vals=True,thresh=True,alpha=0.025)

In [None]:
def save_zscores(dict1,savepath,savename):
    np.savez(opj(savepath,savename),
             sys_names=dict1['sys_names'],
             #sys_df=dict1['sys_df'],
             pMat=dict1['sys_df_pvals'],
             zMat=dict1['sys_df_zvals'])
    


In [None]:
git_path='/Users/steventompson/Git/tompson_netlearn_fmri/data/netLearn_ppi_zscores'

save_zscores(diff_roihub_dict,path_OutpData,'netLearn_diffData_zscores_groupavg_roihub_connmat.npz')
save_zscores(diff_cog_sys_dict,path_OutpData,'netLearn_diffData_zscores_groupavg_cogsys_connmat.npz')
save_zscores(comb_cog_sys_dict,path_OutpData,'netLearn_combData_zscores_groupavg_cogsys_connmat.npz')


save_zscores(diff_roihub_dict,git_path,'netLearn_diffData_zscores_groupavg_roihub_connmat.npz')
save_zscores(diff_cog_sys_dict,git_path,'netLearn_diffData_zscores_groupavg_cogsys_connmat.npz')
save_zscores(comb_cog_sys_dict,git_path,'netLearn_combData_zscores_groupavg_cogsys_connmat.npz')


Steven Tompson | 2019