GLM Z-Score Analysis
=====================================

This script gets z-scores and p-values for the Schaefer atlas nodes by

1.  loading the second-level GLM z-maps for each contrast, 
2.  calculating the average value for each node in the Schaefer atlas, 
3.  computing the z-score and p-values for each node in the Schaefer atlas, and saving the z-scores and p-values into a new npz file.

# Get hub activation for social and non-social tasks

1. Load subject info (cond order list) and atlas info
2. Get dict of hubs with ROI indices
3. Create mask for each set of hubs that you want to extract average activation from
4. Load transition versus non-transition nii file for each task
5. Extract average activation in mask for each set of hubs
6. Loop over each subject & task and fill empty pandas dataframe
7. Save pandas df as CSV

In [41]:
#importing libraries
%matplotlib inline
from __future__ import print_function, division
import scipy.stats as ss
import os
from os.path import join as opj
import pandas as pd
import numpy as np
from numpy import inf
import matplotlib.pyplot as plt
import seaborn as sns
from nilearn import image, plotting, input_data
import nibabel as nb
from mne.stats import fdr_correction
from nistats import thresholding
from random import shuffle
import glob

print('Done Importing Packages')

Done Importing Packages


In [14]:
# Change to directory where you saved the data
home_path1 = '/Users/steventompson/Git/tompson_netlearn_fmri'
home_path = '/Users/steventompson/Dropbox/Research_Projects/Bassett_Statistical_Learning/fMRI_Study'

data_dir = opj(home_path,'data')
template_dir = '/Users/steventompson/Git/tompson_netlearn_fmri/data/brain_atlas'
path_InpData = opj(data_dir,'Subject_Data','netLearn_glm','firstLevel')
path_zData = opj(home_path1,'data','netLearn_ppi_zscores')
path_OutpData = opj(home_path1,'data','glm_means')
path_Figures = opj(home_path,'figures','component_figs') # folder to put figures


for path in [path_OutpData, path_Figures]:
    if not os.path.exists(path):
        print('Path does not exist, creating {}'.format(path))
        os.makedirs(path)

print('Set data paths')

Set data paths


In [15]:
subj_links=pd.read_csv('{}/data/subj_data/netLearn_IDs_26subjs.csv'.format(home_path1))

bad_subjs = ['SNL_001','SNL_004','SNL_028']
subjs = [s for s in subj_links.loc[:,'scanID'].tolist() if s not in bad_subjs]
#subjs.reverse()
n_subjs = len(subjs)
print('We have %d subjects' % (n_subjs))

We have 26 subjects


In [39]:
#load schaefer mask
sch_filename='{}/schaefer400_harvard_oxford_2mm_mni_17network.nii.gz'.format(template_dir)
schaefer_mask=nb.load(sch_filename)

#load schaefer atlas info
schaefer_atlas=pd.read_csv('{}/s400ho_netLearn_2mm.csv'.format(template_dir))

schaefer_atlas=schaefer_atlas.fillna('Uncertain')
schaefer_atlas.loc[schaefer_atlas['System']=='Uncertain','System']='Subcortical'

schaefer_atlas.loc[[402,407],'System']='Hippocampus'
schaefer_atlas.loc[[402,407],'System7']='Hippocampus'

sch_names=np.unique(schaefer_atlas['System'])
sch_nums=[int(np.where(sch_names==label)[0]) for label in schaefer_atlas['System']]


schaefer_atlas.loc[schaefer_atlas['System7']=='Uncertain','System7']='Subcortical'
sch7_names=np.unique(schaefer_atlas['System7'])
sch7_nums=[int(np.where(sch7_names==label)[0]) for label in schaefer_atlas['System7']]

net_coords=np.array(schaefer_atlas.loc[:,['x','y','z']])
net_cols=['black']*len(net_coords)

n_node = len(sch_nums)
triu_ix, triu_iy = np.triu_indices(n_node, k=1)
n_conn = len(triu_ix)

n_perm = 500

schaefer_atlas.head()

Unnamed: 0.1,Unnamed: 0,nID,nVal,x,y,z,nVox,Hemisphere,System,System7,MedialLateral,AnteriorPosterior,VentralDorsal,ROI_label,ns_ROI_label
0,0,0,1,-35,-62,-17,309,LH,VisCent,Vis,Lateral_LH,Posterior,Ventral,Fusiform_L,Fusiform_L
1,1,1,2,-23,-73,-10,426,LH,VisCent,Vis,Lateral_LH,Posterior,Ventral,Fusiform_L,Fusiform_L
2,2,2,3,-36,-81,-16,357,LH,VisCent,Vis,Lateral_LH,Posterior,Ventral,Fusiform_L,Fusiform_L
3,3,3,4,-17,-86,-15,320,LH,VisCent,Vis,Medial,Posterior,Ventral,Lingual_L,Lingual_L
4,4,4,5,-24,-97,-12,531,LH,VisCent,Vis,Lateral_LH,Posterior,Ventral,Occipital_Inf_L,Occipital_Inf_L


In [36]:
subj_links.loc[:,'nsFile']='NA'
subj_links.loc[:,'socFile']='NA'
for subj in subj_links['pID']:
    cond=subj_links.loc[subj_links['pID']==subj,'CondNum'].tolist()[0]
    scanID=subj_links.loc[subj_links['pID']==subj,'scanID'].tolist()[0]
    if cond==1:
        nsFile='{}_task1_Transition_z_map.nii.gz'.format(scanID)
        socFile='{}_task2_Transition_z_map.nii.gz'.format(scanID)
    else:
        nsFile='{}_task2_Transition_z_map.nii.gz'.format(scanID)
        socFile='{}_task1_Transition_z_map.nii.gz'.format(scanID)
    subj_links.loc[subj_links['pID']==subj,'nsFile']=nsFile
    subj_links.loc[subj_links['pID']==subj,'socFile']=socFile
    
ns_filenames=subj_links['nsFile'].values
soc_filenames=subj_links['socFile'].values

In [21]:

list1=['combData','diffData','nonSocData','SocData']
list1_labels=['All Tasks','Social versus Non-Social','Non-Social Task','Social Task']
list2=['nodestr']
list2_labels=['Global']


apriori_names=['dmPFC_L','vmPFC_L','PCC_L','Hippocampus_L','TPJ_L',
               'dmPFC_R','vmPFC_R','PCC_R','Hippocampus_R','TPJ_R',
               'Frontal_Inf_Oper_L','Frontal_Inf_Orb_L','Frontal_Inf_Tri_L',
               'Frontal_Inf_Oper_R','Frontal_Inf_Orb_R','Frontal_Inf_Tri_R',
               'Amygdala_L','Amygdala_R',
               'Ventral_Striatum_L','Caudate_L',
               'Ventral_Striatum_R','Caudate_R']



def identify_hubs1(ix,iy,thresh=True,apriori=True,flip=False,alpha=0.05):
    xx=list2[ix]
    yy=list1[iy]
    data=np.load('{}/netLearn_{}_zscores_26subjs_{}.npz'.format(path_zData,yy,xx))
    zMat=data['zMat']
    pMat=data['pMat']
    #pMat=np.multiply(data['pMat'],2)
    
    if flip:
        zMat=np.multiply(zMat,-1)
    
    # Set nonsignificant values to zero using FDR correction
    if thresh:
        reject_fdr,pval_fdr=fdr_correction(pMat,alpha)
        zMat=np.multiply(zMat,reject_fdr)

    title='{}- {} Connectivity'.format(list1_labels[iy],list2_labels[ix])
    sig_ix=np.where(zMat>0)[0]
    sig_dic={}
    sig_vec=np.zeros(zMat.shape)
    if apriori:
        sig_ix=[x for x in sig_ix if schaefer_atlas.loc[x,'ns_ROI_label'] in apriori_names]
        for i,x in enumerate(apriori_names):
            sig_dic[x]=[ix for ix in sig_ix if schaefer_atlas.loc[ix,'ns_ROI_label']==x]
            sig_dic=dict((k, v) for k, v in sig_dic.iteritems() if v)

    sig_vec[sig_ix]=1
        
    print('')
    print(title)
    print('{} significant positive ROIs in a priori regions'.format(len(sig_ix)))
    return sig_ix,sig_vec,sig_dic
    
sigvals_comb,sigvec_comb,sigdic_comb=identify_hubs1(0,0,alpha=0.025)
signames_comb=schaefer_atlas.loc[sigvals_comb,'ns_ROI_label']

sigvals_diff,sigvec_diff,sigdic_diff=identify_hubs1(0,1,alpha=0.025)
signames_diff=schaefer_atlas.loc[sigvals_diff,'ns_ROI_label']


sigvals_diff_ns,sigvec_diff_ns,sigdic_diff_ns=identify_hubs1(0,1,flip=True,alpha=0.025)
signames_diff_ns=schaefer_atlas.loc[sigvals_diff_ns,'ns_ROI_label']

sigdic_comb['Hippocampus']=[402,407]
sigdic_comb['combhubs']=sigvals_comb
sigdic_diff['diffhubs']=sigvals_diff
sigdic_diff_ns['diffhubs_ns']=sigvals_diff_ns

lpfc_l_vals=[sigdic_diff_ns[key] for key in sigdic_diff_ns.keys() if 'Frontal' in key and '_L' in key]
sigdic_diff_ns['lPFC_L']=[item for sublist in lpfc_l_vals for item in sublist]
lpfc_r_vals=[sigdic_diff_ns[key] for key in sigdic_diff_ns.keys() if 'Frontal' in key and '_R' in key]
sigdic_diff_ns['lPFC_R']=[item for sublist in lpfc_r_vals for item in sublist]


for x in sigdic_comb.keys():
    print('DG Hubs: Column ids for {}= {}'.format(x,sigdic_comb[x]))
    
print('')
for x in sigdic_diff.keys():
    print('Soc Hubs: Column ids for {}= {}'.format(x,sigdic_diff[x]))
    
print('')
for x in sigdic_diff_ns.keys():
    print('NS Hubs: Column ids for {}= {}'.format(x,sigdic_diff_ns[x]))


All Tasks- Global Connectivity
10 significant positive ROIs in a priori regions

Social versus Non-Social- Global Connectivity
6 significant positive ROIs in a priori regions

Social versus Non-Social- Global Connectivity
11 significant positive ROIs in a priori regions
DG Hubs: Column ids for dmPFC_L= [165, 174]
DG Hubs: Column ids for vmPFC_L= [160]
DG Hubs: Column ids for TPJ_R= [244]
DG Hubs: Column ids for Hippocampus_R= [407]
DG Hubs: Column ids for combhubs= [44, 160, 165, 174, 244, 370, 376, 378, 402, 407]
DG Hubs: Column ids for TPJ_L= [44]
DG Hubs: Column ids for Hippocampus_L= [402]
DG Hubs: Column ids for Hippocampus= [402, 407]
DG Hubs: Column ids for dmPFC_R= [370, 376, 378]

Soc Hubs: Column ids for TPJ_R= [262, 394, 396, 397]
Soc Hubs: Column ids for diffhubs= [85, 172, 262, 394, 396, 397]
Soc Hubs: Column ids for TPJ_L= [85, 172]

NS Hubs: Column ids for lPFC_L= [182]
NS Hubs: Column ids for dmPFC_L= [165, 177]
NS Hubs: Column ids for Frontal_Inf_Orb_R= [308]
NS Hubs:

In [47]:
def get_atlas_means(flist,fpath,atlas_path):
    fnames=[opj(fpath,f) for f in flist]
    print('Concatenating {} images'.format(len(fnames)))
    all_imgs=image.concat_imgs(fnames,memory='nilearn_cache',memory_level=1)
    masker = input_data.NiftiLabelsMasker(atlas_path,
                                           detrend=False, 
                                           standardize=False, 
                                           low_pass=None, high_pass=None, 
                                           t_r=1,
                                           memory='nilearn_cache', memory_level=1); #verbose=2 by default nothing should be printed

    print('Extracting values from nii image')
    all_ts = masker.fit_transform(all_imgs).squeeze()

    print('Matrix created with shape {}'.format(all_ts.shape))
    return all_ts

node_means=get_atlas_means(ns_filenames,path_InpData,sch_filename)

Concatenating 26 images
Extracting values from nii image
Matrix created with shape (26, 410)


In [60]:
# In[ ]:
#adj_mat is 2-d matrix with n_subj x n_node shape
#row_ix is a list of indices to keep for the rows
#col_ix is a list of indices to keep for the columns
def get_means(roi_mat,row_ix,col_ix):
    #cfg_mat is a 3D n_subjs x n_perm x n_conn np array
    #row_ix is a list of the row indices to include in the means to extract
    #col_ix is a list of the column indices to include in the means to extract
    subdata=roi_mat[row_ix,:]
    subdata=subdata[:,col_ix]
    rowMeans=np.mean(subdata,axis=(-1))
    return rowMeans

def get_subhub_means(roi_mat,col_dict,col_id):
    #roi_cfg is a 2D n_subjs x n_conn np array
    #nulll_cfg is a 3D n_subjs x n_perm x n_conn np array
    #row_id is a string that matches a key in the row_dict (or is 'all' which will get global connectivity)
    #row_dict is a dictionary with keys matching a priori hubs and row indices for each hub
    #col_id is a string that matches a key in the col_dict (or is 'all' which will get global connectivity)
    #col_dict is a dictionary with keys matching a priori hubs and column indices for each hub
    #thresh is a boolean determining whether to set non-significant z-scores to zero
    print('')    
    if col_id=='all':
        col_ix=range(410)
    else:
        col_ix=col_dict[col_id]
        
    print('Getting subject averages for {}'.format(col_id))
    subj_vec=get_means(roi_mat,range(roi_mat.shape[0]),col_ix)
    return subj_vec

test=get_subhub_means(node_means,sigdic_diff,'diffhubs')
test.shape


Getting subject averages for diffhubs


(26,)

In [55]:
sigdic_diff.keys()

['TPJ_R', 'diffhubs', 'TPJ_L']

In [51]:
node_means[range(26),:]

array([[ 1.84835539,  0.82312892,  2.1707546 , ...,  0.29755954,
         0.18091637,  0.43706501],
       [ 1.71391192,  0.49677069,  1.62133033, ...,  0.58582973,
         0.92263893,  0.22388916],
       [ 2.36524565,  3.15082657,  2.0522868 , ..., -0.07091223,
         0.26890892,  0.12084748],
       ...,
       [ 1.02474884,  1.28661817,  1.33643999, ..., -0.66685007,
        -1.61041167, -0.41039404],
       [ 1.03882852,  2.09557063,  1.82848311, ..., -1.68425837,
        -1.32958808, -1.10583156],
       [ 0.8475027 ,  1.10025549,  0.95867425, ..., -0.71877035,
        -0.75124414, -1.59969763]])

In [None]:


def get_node_zscores(node_vec,null_vec,savename,thresh=False):
    zMat=np.zeros(node_vec.shape)
    pMat=zMat.copy()
    n_node=node_vec.shape[0]
    for x in range(n_node):
        if x==0:
            print('Working on node 1',end='')
        elif (x+1)%(int(n_node/10))==0:
            print('...{}'.format(x+1),end='')
        elif (x+1)==n_node:
            print('')
        pval=ss.percentileofscore(null_vec[:,x],node_vec[x])/100
        zMat[x]=ss.norm.ppf(pval)
        zMat[zMat == -inf] = -4
        zMat[zMat == inf] = 4
        if pval<0.5:
            pMat[x]=pval
        else:
            pMat[x]=1-pval
    print('...')
    print('Computed zMat and pMat for {} nodes'.format(n_node))
    #np.array([(ss.percentileofscore(null_mat[:,x],roi_mat[x])/100) for x in range(n_node)])
    if thresh:
        reject_fdr,fdr_pvals=fdr_correction(pMat,alpha=0.025)
        n_sig=np.sum(reject_fdr)
        print('# of significant nodes for {} is: {}'.format(col_id,n_sig))
        zMat=np.multiply(zMat,reject_fdr)
        pMat=fdr_pvals
    
    savefile='{}/netLearn_{}_zscores_{}nodes'.format(path_OutpData,savename,n_node)
    np.savez(savefile,zMat=zMat,pMat=pMat)
    print('Saving file to {}.npz'.format(savefile))
    print('...')
    return zMat,pMat

def comb_func(con_name,atlas_path):
    print('Starting extraction of z-scores for {}'.format(con_name))
    print('...')
    t1=get_atlas_means(path_InpData,con_name,atlas_path)
    t2=get_atlas_means(path_NullData,con_name,atlas_path)
    zMat,pMat=get_node_zscores(t1,t2,con_name)    

In [None]:
#Define function to extract a single ROI
def get_roi_mean(fname,atlas,roinums):
    tdata=nb.load(fname).get_data()
    mask=np.where(atlas==roinum,1,0)
    roidata=np.multiply(mask,tdata)
    roi_mean=np.sum(roidata)/np.sum(mask)
    return roi_mean

template_dir=opj('/data/jux/stompson/tools/BrainAtlases')

#load schaefer mask
schaefer_file='{}/Schaefer2018/schaefer400_harvard_oxford_2mm_mni_17network.nii.gz'.format(template_dir)
schaefer_mask=nb.load(schaefer_file)

roi_file='{}/Schaefer2018/s400ho_91roi_atlas_2mm_mni_17network.nii.gz'.format(template_dir)
roi_mask=nb.load(roi_file)

schaefer_data=schaefer_mask.get_data()
roi_data=roi_mask.get_data()

#load schaefer atlas info
schaefer_atlas=pd.read_pickle('{}/s400ho_ns_netLearn_2mm.pickle'.format(template_dir))
roi_atlas=pd.read_pickle('{}/s400ho_91_neurosynth_roi_netLearn_2mm.pickle'.format(template_dir))


schaefer_atlas=schaefer_atlas.fillna('Uncertain')
sch_names=np.unique(schaefer_atlas['System'])
sch_nums=[int(np.where(sch_names==label)[0]) for label in schaefer_atlas['System']]

roi_names=np.unique(roi_atlas['ROI'])
roi_nums=[int(np.where(roi_names==label)[0]) for label in roi_atlas['ROI']]

n_node = len(sch_nums)
triu_ix, triu_iy = np.triu_indices(n_node, k=1)
n_conn = len(triu_ix)


In [None]:




    
def load_subj_map(subj,task):
    
def subj_activation(task):
    df=pd.DataFrame()
    
    # load activation maps
    print('')
    glm_map=load_mat('netLearn_{}_transition_PPI_26subjs.npz'.format(task))
    print('')
    null_mat=load_null_mat('netLearn_{}_nulldata_transition_PPI_26subjs.npz'.format(task))
    
    for a in ['combhubs','Hippocampus','Hippocampus_L','Hippocampus_R']:
    #for a in ['Hippocampus','Hippocampus_L','Hippocampus_R','Frontal_Inf_Orb_L','Frontal_Inf_Tri_R','dmPFC_L','dmPFC_R','vmPFC_L']:
            colname='{}_global'.format(a)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,a,'all',sigdic_comb,sigdic_comb)
            df[colname]=sub_z
            print('')

    #for a in ['PCC_L','TPJ_L','TPJ_R']:
    for a in ['diffhubs','TPJ_L','TPJ_R']:
            colname='{}_global'.format(a)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,a,'all',sigdic_diff,sigdic_diff)
            df[colname]=sub_z
            print('')
            
    #for a in ['PCC_L','TPJ_L','TPJ_R']:
    for a in ['diffhubs_ns','dmPFC_L','dmPFC_R','lPFC_L','lPFC_R']:
            colname='{}_global'.format(a)
            sub_z,sub_p=get_subhub_zscores(ppi_mat,null_mat,a,'all',sigdic_diff_ns,sigdic_diff_ns)
            df[colname]=sub_z
            print('')
    
    df.to_csv('{}/netLearn_{}Data_zscores_26subjs_sighubs_nodestr.csv'.format(path_OutpData,task))
    return df

