# Run Whole-Brain PPI Analysis
This script takes the preprocessed brain activity from the network learning task, extracts timeseries of activation for each node in the Schaefer+Harvard-Oxford combined atlas, computes PPI scores for each node pair, and then repeats the PPI analysis 500 times after shuffling the trial order each time to generate null models.

# 1: Set up environment

In [1]:
#importing libraries
from __future__ import print_function, division

import os, sys
import nilearn, nipy, nistats, scipy, statsmodels
from os.path import join as opj

import numpy as np
import scipy.stats as ss
import pandas as pd
import statsmodels.api as sm

from nilearn import image, plotting, input_data
from nilearn.connectome import ConnectivityMeasure

from nipy.modalities.fmri import hrf
from nipy.modalities.fmri import hemodynamic_models

from nistats.design_matrix import make_first_level_design_matrix

print('Done Importing Packages')

print('nilearn:',nilearn.__version__)
print('nipy:',nipy.__version__)
print('nistats:',nistats.__version__)
print('numpy:',np.__version__)
print('pandas:',pd.__version__)
print('scipy:',scipy.__version__)
print('statsmodels:',statsmodels.__version__)

  exec(code_obj, self.user_global_ns, self.user_ns)
  from . import (_representation, _kalman_filter, _kalman_smoother,
  from . import (_representation, _kalman_filter, _kalman_smoother,
  from . import kalman_loglike
  from . import kalman_loglike
  from statsmodels.tsa.regime_switching._hamilton_filter import (
  from statsmodels.tsa.regime_switching._hamilton_filter import (
  from .linbin import fast_linbin
  from .linbin import fast_linbin
  from ._smoothers_lowess import lowess as _lowess
  from ._smoothers_lowess import lowess as _lowess
  from ..utils.seq_dataset import ArrayDataset, CSRDataset
  from ..utils.seq_dataset import ArrayDataset, CSRDataset
  from ..utils import arrayfuncs, as_float_array, check_X_y, deprecated
  from ..utils import arrayfuncs, as_float_array, check_X_y, deprecated
  from . import cd_fast
  from . import cd_fast
  from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
  from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Hube

Done Importing Packages
nilearn: 0.4.2
nipy: 0.4.2
nistats: 0.0.1b
numpy: 1.16.4
pandas: 0.23.0
scipy: 1.1.0
statsmodels: 0.9.0


In [2]:
# Create miscellaneous functions that you will use throughout script

def convert_cfg_vec_to_adj_matr(conn_vec):
    '''
    Convert connections to adjacency matrix
    Assumes symmetric connectivity

    Parameters
    ----------
        conn_vec: numpy.ndarray
            Vector with shape (n_conn,) specifying unique connections

    Returns
    -------
        adj_matr: numpy.ndarray
            Symmetric matrix with shape (n_node, n_node)
    '''
    # Standard param checks
    #errors.check_type(conn_vec, np.ndarray)
    if not len(conn_vec.shape) == 1:
        raise ValueError('%r has more than 1-dimension')

    # Compute number of nodes
    n_node = int(np.floor(np.sqrt(2*len(conn_vec)))+1)

    # Compute upper triangle indices (by convention)
    triu_ix, triu_iy = np.triu_indices(n_node, k=1)

    # Convert to adjacency matrix
    adj_matr = np.zeros((n_node, n_node))
    adj_matr[triu_ix, triu_iy] = conn_vec

    adj_matr += adj_matr.T

    return adj_matr


def convert_adj_matr_to_cfg_matr(adj_matr):
    '''
    Convert connections to adjacency matrix
    Assumes symmetric connectivity

    Parameters
    ----------
        adj_matr: numpy.ndarray
            Matrix with shape (n_win, n_node, n_node)

    Returns
    -------
        cfg_matr: numpy.ndarray
            Symmetric matrix with shape (n_win, n_conn)
    '''
    # Standard param checks
    #errors.check_type(adj_matr, np.ndarray)
    if not len(adj_matr.shape) == 3:
        raise ValueError('%r requires 3-dimensions (n_win, n_node, n_node)')

    # Compute number of nodes
    n_node = adj_matr.shape[1]

    # Compute upper triangle indices (by convention)
    triu_ix, triu_iy = np.triu_indices(n_node, k=1)

    # Convert to configuration matrix
    cfg_matr = adj_matr[:, triu_ix, triu_iy]

    return cfg_matr

### Set paths and subject list
Then we define the paths to the subjects we want to extract timeseries for.

In [None]:
# Change to directory where you saved the data
home_path = '/Users/steventompson/Git/tompson_netlearn_fmri'

data_dir = opj(home_path,'data')
path_CodeDir = opj(home_path,'scripts')
funcpath = opj(data_dir,'subjs')


#path_InpData = opj(data_dir,'subjs')
path_InpData = '/Volumes/Tompson_fMRIData2/SNL_Study/data/'
path_OutpData1 = opj(data_dir,'timeSeries','netLearn_cfr_schaefer')
path_OutpData2 = opj(data_dir,'timeSeries','deconv_temp')
path_OutpData3 = opj(data_dir,'timeSeries','netLearn_cfr_schaefer_deconv')
path_OutpData4 = opj(data_dir,'netLearn_ppi')
path_OutpData5 = opj(data_dir,'netLearn_ppi_null')



#Check if paths exist and, if they don't, create them
for path in [path_InpData, path_OutpData1, path_OutpData2, 
             path_OutpData3, path_OutpData4, path_OutpData5]:
    if not os.path.exists(path):
        print('\nPath: {}, does not exist'.format(path))
        os.makedirs(path)

print('Set data paths')

In [None]:
subjs=sorted([s for s in os.listdir(path_InpData) if 'SNL' in s])

bad_subjs = ['SNL_001','SNL_004','SNL_028']
subjs = [s for s in subjs if s not in bad_subjs]
#subjs.reverse()
n_subjs = len(subjs)
print('We have %d subjects' % (n_subjs))

#Create list of run IDs
runs=['run1','run2','run3','run4','run5','run6','run7','run8','run9','run10']

param_list=[]
for ii in subjs:
    for jj in runs:
        param_list.append([ii,jj])

fd_thresh = 0.5

# 2: Extract brain activity
Use nilearn to extract the timeseries for each node and clean the data to control for confounds. Most of the cleaning of the data was done at the preprocessing step, but we will also regress out high-variance-confounds (similar to compcorr) and standardize the timeseries.

In [None]:
## Define functions
#Define functions that will extract/create the confound regressors, 
#extract and clean the time-series for each node, 
#and calculate the framewise displacement.

def clean_time_series(subjID, runID, atlas_path, hv_confounds=None, fd_thresh = None):  
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    funcPath=os.path.join(path_InpData,subjID,'func')
    anatPath=os.path.join(path_InpData,subjID,'anat')
    nifti_file = 'norm_rg_bp_dmt_stc_mc_dsp_BOLD_{}.nii.gz'.format(runID)
    conf_filename = opj(funcPath,'bp_dmt_stc_mc_dsp_BOLD_{}.nii.gz_36Params.1D'.format(runID))
    func_filename = opj(funcPath, nifti_file)  #preprocessed data

    #Extract high variance confounds
    print('Extracting high variance confounds')
    hv_confounds = nilearn.image.high_variance_confounds(func_filename) #high variance confounds
    
    print('Setting up mask')
    masker = input_data.NiftiLabelsMasker(atlas_path,
                                           detrend=False, 
                                           standardize=True, 
                                           low_pass=None, high_pass=None, 
                                           t_r=1,  
                                           memory='nilearn_cache', memory_level=1); #verbose=2 by default nothing should be printed
      
    print('Extracting cleaned data')
    cleaned_time_series = masker.fit_transform(func_filename,confounds=[hv_confounds])  
    
    print('Computing framewise displacement')
    if fd_thresh>0:     
        FD = compute_fd(conf_filename)
        bad_fd_vols  = np.where(FD > fd_thresh, 1,0)
    else:
        bad_fd_vols = []
    
    print('Percent of volumes with excessive head motion: {}'.format(np.mean(bad_fd_vols)))
        
    return cleaned_time_series,  FD, bad_fd_vols

def compute_fd(conf_filename):
    '''
    Load motion parameters file and compute framewise displacement. 
    This function is based on Powers' 2012 as well as Bramila tools and 
    Poldrack's fMRI QA script. Important note: It assumes the input is a 
    SPM-realignment parameter file. FSL uses a different ordering and 
    thus cannot be used blindly... Beware!

    Parameters
    ----------
        conf_filename: character string
            String indicating the name of the motion confound file to load

    Returns
    -------
        FD: numpy nd.array
            1-D array with length of the number of volumes in the task run, 
            representing the framewise displacement for each volume.
            
    '''


    motpars = np.loadtxt(conf_filename)[:,range(6)]

    # compute absolute displacement
    dmotpars=np.zeros(motpars.shape)
    
    dmotpars[1:,:]=np.abs(motpars[1:,:] - motpars[:-1,:])
    
    # convert rotation to displacement on a 50 mm sphere
    # mcflirt returns rotation in radians
    # from Jonathan Power:
    # The conversion is simple - you just want the length of an arc that a rotational
    # displacement causes at some radius. Circumference is pi*diameter, and we used a 50 mm radius. 
    # Multiply that circumference by (degrees/360) or (radians/2*pi) to get the 
    # length of the arc produced by a rotation.  
    headradius=50
    disp=dmotpars.copy()
    disp[:,0:3]=np.pi*headradius*2*(disp[:,0:3]/(2*np.pi))
    
    FD=np.sum(disp,1)
      
    return FD

### Import brain atlas
Next, import the Schaefer+Subcortical atlas with 400 cortical regions (200 per hemisphere)

In [None]:
template_dir='/data/jux/stompson/tools/BrainAtlases'
maskPath='{}/Schaefer2018/schaefer400_harvard_oxford_2mm_mni_17network.nii.gz'.format(template_dir)
smask=nb.load(maskPath)
sdata=smask.get_data()

atlas_file='{}/neurosynth_data/s400ho_ns_netLearn_2mm.pickle'.format(template_dir)
atlas_info=pd.read_pickle(atlas_file)

### Extract data from individual subjects, clean data, and save time_series
Here we extract the data from individual subjects during each run of the network learning task.

In [None]:
runNum = 0

for n in range(len(param_list)): 
    #Set the subject and run IDs based on the parameter list generated above
    subjID=param_list[n][0]
    runID=param_list[n][1]
    print('Working on param # {}: subject {}, {}'.format(n+1,param_list[n][0],param_list[n][1]))
    print('...')
    
    #Set paths for important files/folders
    anatPath=opj(path_InpData,subjID,'anat')
    funcPath=os.path.join(path_InpData,subjID,'func')
    conf_filename = opj(funcPath,'bp_dmt_stc_mc_dsp_BOLD_{}.nii.gz_36Params.1D'.format(runID)) #motion confounds
    saveFile1=os.path.join(path_OutpData1, ('{}_netLearn_{}_TS_Schaefer400+Subcort'.format(subjID,runID)))
    

    #compute cleaned time-series controlling for high-variance confounds and computing fd and bad_fd_vols as well
    cleaned_time_series, fds, bad_fd_vols  = clean_time_series(subjID, runID, 
                                                               atlas_path=maskPath, fd_thresh = fd_thresh)
    
    #save important data for each subject/run to a new numpy zipped file
    np.savez(saveFile1, cleaned_time_series, fds, bad_fd_vols)
    print('Saved file: {}.npz'.format(saveFile1))
    print('...')

# 3: Deconvolve timeseries

Take the timeseries for each node extracted in the first step and deconvolve it from the hemodynamic response function.

In [None]:
        
def load_sub_data1(subjID,runID):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #subjID and runID are strings to pass to filename
    #file is an npz file with an n_trs x n_nodes matrix (so it is a 2D matrix with stacked timeseries)
    loadFile1=opj(path_OutpData1, ('{}_netLearn_{}_TS_Schaefer400+Subcort.npz'.format(subjID,runID)))
    data=np.load(loadFile1)
    timeseries=data['arr_0']
    return timeseries

def deconvolve_subj_data(paramID):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #paramID is the index for which subject and run ids to use for this task
    print('Working on param # {}: subject {}, {}'.format(paramID+1,param_list[paramID][0],param_list[paramID][1]))
    subjID=param_list[paramID][0]
    runID=param_list[paramID][1]
    saveFile='{}/{}_netLearn_{}_deconvolved_ts'.format(path_OutpData3,subjID,runID)

    #Load subject timeseries data
    bold_ts=load_sub_data1(subjID,runID)
    n_trs=bold_ts.shape[0]
    n_nodes=bold_ts.shape[1]

    #Create empty dataframe to fill with deconvolved data
    index=range(n_trs)
    columns=range(n_nodes)
    dcv_ts=pd.DataFrame(index=index,columns=columns)

    cwd=os.getcwd()
    os.chdir(path_OutpData2)
    
    #Loop over nodes and deconvolve each timeseries
    for n,node_ts in enumerate(bold_ts.T):
        if n==0:
            print('Working on node #{}'.format(n),end='')
        elif n%50==0:
            print(n,end='')
        elif n%10==0:
            print('.'.format(n),end='')

        #Save node timeseries to 1D file
        sFile1='{}/{}_{}_bold_ts_node{}.1D'.format(path_OutpData2,subjID,runID,n)
        pd.DataFrame(node_ts).to_csv(sFile1,index=False,header=False)

        #Deconvolve BOLD signal for node using AFNI's 3dTfitter function
        sFile2='{}/{}_{}_neur_ts_node{}'.format(path_OutpData2,subjID,runID,n)
        cmd='3dTfitter -RHS {} -FALTUNG gloverHRF.1D {} 012 0'.format(sFile1,sFile2)
        subprocess.call(cmd,shell=True)

        #3dTfitter saves the file as a .1D text file, so need to reload that and add to 
        #deconvolved timeseries dataframe
        node_dc_ts=np.loadtxt('{}.1D'.format(sFile2)).tolist()
        dcv_ts.loc[:,n]=node_dc_ts

    os.chdir(cwd)

    #save important data for each subject/run to a new numpy zipped file
    np.savez(saveFile, np.array(dcv_ts))
    print('...')
    print('Saved file: {}.npz'.format(saveFile))
    print('...')


for n in range(len(param_list)): 
    #Set the subject and run IDs based on the parameter list generated above
    subjID=param_list[n][0]
    runID=param_list[n][1]
    print('Working on param # {}: subject {}, {}'.format(n+1,param_list[n][0],param_list[n][1]))
    print('...')
    deconvolve_subj_data(n)

# 4: Get PPI matrices

### Set task variables and trial-level data for each subject and run

In [None]:
#Load master file with subject IDs and trial info as well as list of subject IDs for scanner      
masterFile=pd.read_csv('{}/subj_data/netLearn_masterFile_27subjs.csv'.format(data_dir))
subj_links=pd.read_csv('{}/subj_data/netLearn_IDs_26subjs.csv'.format(data_dir))

# # Set task data

#Create run labels
masterFile['Run']=0
masterFile.loc[np.in1d(np.array(masterFile['trialNum']),range(200)),'Run']=1
masterFile.loc[np.in1d(np.array(masterFile['trialNum']),range(200,400)),'Run']=2
masterFile.loc[np.in1d(np.array(masterFile['trialNum']),range(400,600)),'Run']=3
masterFile.loc[np.in1d(np.array(masterFile['trialNum']),range(600,800)),'Run']=4
masterFile.loc[np.in1d(np.array(masterFile['trialNum']),range(800,1000)),'Run']=5

masterFile.loc[masterFile['transition']==0,'transition']='x'
masterFile.loc[masterFile['transition']==1,'transition']='transition'

masterFile=masterFile.loc[masterFile['transition']=='transition',:]

#Set experiment parameters
tr = 1.0
condlist=['transition']
hmlist=['tx','ty','tz','rx','ry','rz']
covarlist=['tx','ty','tz','rx','ry','rz','constant']
n_nodes=410

### Define functions

In [None]:
def load_sub_data2(subjID,runID):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #subjID and runID are strings to pass to the filename to load
    #load deconvolved timeseries matrix with n_trs x n_nodes shape
    loadFile1='{}/{}_netLearn_{}_deconvolved_ts.npz'.format(path_OutpData3,subjID,runID)
    data=np.load(loadFile1)
    timeseries=data['arr_0']
    #for x in range(timeseries.shape[1]):
    #    timeseries[:,x]=ss.zscore(timeseries[:,x])
    
    #Load head motion parameters file
    conf_filename = opj(funcPath,'bp_dmt_stc_mc_dsp_BOLD_{}.nii.gz_36Params.1D'.format(runID))  #motion confounds
    motpars = pd.read_csv(conf_filename, header=None, delim_whitespace=True)
    motpars = motpars.loc[:,:5].as_matrix()
    
    #Z-score the head motion parameters (step is probably not necessary but yielded
    #beta weights that were standardized and thus more interpretable)
    for x in range(motpars.shape[1]):
        motpars[:,x]=ss.zscore(motpars[:,x])
    return timeseries,motpars

def create_sub_design(subjID,runID,df,regs,regnames):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #Note: ppt in Condition #1 saw non-social task first, whereas ppt in Condition #2 saw social task first
    #Convert subject and run ids to integers
    pID=int(subjID[-3:])
    rID=int(runID.replace('run',''))
    n_scans = len(regs)
    frame_times = np.arange(n_scans) * tr
    subdata=df.loc[df['pID']==pID,:]
    #Choose subject run based on counterbalance condition number
    #Can simplify this code if you didn't counterbalance tasks (or only have one task)
    if int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==1 and rID<6:
        subdata=subdata.loc[subdata['Cond']=='NS',:]
        subdata=subdata.loc[subdata['Run']==rID,:]
    elif int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==1 and rID>5:
        subdata=subdata.loc[subdata['Cond']=='Soc',:]
        subdata=subdata.loc[subdata['Run']==rID-5,:]
    elif int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==2 and rID<6:
        subdata=subdata.loc[subdata['Cond']=='Soc',:]
        subdata=subdata.loc[subdata['Run']==rID,:]
    elif int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==2 and rID>5:
        subdata=subdata.loc[subdata['Cond']=='NS',:]
        subdata=subdata.loc[subdata['Run']==rID-5,:]
    trials=subdata['transition'].tolist()
    onsets=subdata['onset_raw'].tolist()
    durs=[1.5]*len(trials)
    
    #Make design matrix using nistats
    paradigm = pd.DataFrame({'trial_type': trials, 'onset': onsets,
                             'duration': durs})
    X1 = make_first_level_design_matrix(frame_times, paradigm, drift_model='polynomial',
                                        add_regs=regs, add_reg_names=regnames, 
                                        drift_order=3, hrf_model=None)
    return X1

def create_ppi_design(df,roi,condlist,covarlist):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    X2 = df.copy()
    X2['ROI']=roi
    cols=['ROI']
    #Construct ppi terms
    for cond in condlist:
        #X2[str(cond+'_PPI')]=ss.zscore(np.multiply(X2['ROI'],X2[cond]))
        X2[str(cond+'_PPI')]=np.multiply(X2['ROI'],X2[cond])
        cols.append(cond)
        cols.append(str(cond+'_PPI'))
    cols.extend(covarlist)
    return X2,cols

def get_ppi_beta(df,dv,index):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #extract the beta weight for the ppi term (or any other terms) using the ppi design matrix
    #and a timeseries for another node
    #
    X = df
    y = dv
    model = sm.OLS(y,X,hasconst=True).fit()
    #predictions = model.predict(X)
    #print(model.summary())
    bweight=model.params[index]
    return bweight

In [None]:
ppivar = 'transition_PPI'

for n in range(len(param_list)): 
    #Set the subject and run IDs based on the parameter list generated above
    subjID=param_list[n][0]
    runID=param_list[n][1]
    print('Working on param # {}: subject {}, {}'.format(n+1,param_list[n][0],param_list[n][1]))
    print('...')

    #Set path and filename to save the ppi matrix
    saveFile1=os.path.join(path_OutpData4, ('{}_netLearn_{}_{}_410nodes'.format(subjID,runID,ppivar)))

    #Load subject trial and head motion data and create design matrix
    data1,hm1=load_sub_data2(subjID,runID)
    newdf=create_sub_design(subjID,runID,masterFile,hm1,hmlist)
    #n_nodes=data1.shape[1]
    ppiMat=np.zeros((n_nodes,n_nodes))

    #Can set this to be a subset of the conditions by changing condlist above
    newcondlist=[x for x in condlist if x in np.array(newdf.columns)]

    #Loop over each node to create a design matrix for each node
    for ii in range(n_nodes):
        if ii==0:
            print('Working on node {}'.format(ii),end='')
        elif ii%50==0:
            print(ii,end='')
        elif ii%10==0:
            print('.',end='')

        ppidf,ppicols=create_ppi_design(newdf,data1[:,ii],newcondlist,covarlist)
        ppidf=ppidf[ppicols]
        hkernel = [hemodynamic_models.glover_hrf(tr, oversampling=1)]

        #Z-score the design matrix so the beta weights are standardized (again can skip this step if you want)
        for x in ppicols[:-len(covarlist)]:
            ppidf.loc[:,x]=ss.zscore(np.convolve(ppidf.loc[:,x],hkernel[0])[:np.array(ppidf.loc[:,x]).size])

        #Get the z-scored timeseries for each deconvolved timeseries to compute ppi for each node pair
        for jj in range(n_nodes):
            if jj!=ii:
                conv_dv=ss.zscore(np.convolve(data1[:,jj],hkernel[0])[:np.array(data1[:,jj]).size])
                betaweight=get_ppi_beta(ppidf,conv_dv,ppivar)
                ppiMat[ii,jj]=betaweight

    # Convert to cfg_vec to symmetrize the upper and lower triangles 
    # since ppi of y to x is not identical to x to y

    # Compute upper triangle indices (by convention)
    triu_ix, triu_iy = np.triu_indices(n_nodes, k=1)

    # Convert to configuration matrix
    ppivec_upp = ppiMat[triu_ix, triu_iy]
    ppivec_low = ppiMat[triu_iy, triu_ix]

    # Compute mean cfg_vec
    ppivec=np.mean(np.vstack((ppivec_upp,ppivec_low)),axis=0)

    # Convert back to adjacency matrix
    ppiMat2=convert_cfg_vec_to_adj_matr(ppivec)

    #save important data for each subject/run to a new numpy zipped file
    np.savez(saveFile1, ppiMat=ppiMat2)
    print('')
    print('Saved file: {}.npz'.format(saveFile1))
    print('...')

# 5: Create null model PPI matrices
Shuffle the trial order for each run and re-run the PPI analysis 500 times to generate null models

In [None]:
def create_null_sub_design(subjID,runID,df,regs,regnames):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #Note: ppt in Condition #1 saw non-social task first, whereas ppt in Condition #2 saw social task first
    pID=int(subjID[-3:])
    rID=int(runID.replace('run',''))
    n_scans = len(regs)
    frame_times = np.arange(n_scans) * tr
    subdata=df.loc[df['pID']==pID,:]
    if int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==1 and rID<6:
        subdata=subdata.loc[subdata['Cond']=='NS',:]
        subdata=subdata.loc[subdata['Run']==rID,:]
    elif int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==1 and rID>5:
        subdata=subdata.loc[subdata['Cond']=='Soc',:]
        subdata=subdata.loc[subdata['Run']==rID-5,:]
    elif int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==2 and rID<6:
        subdata=subdata.loc[subdata['Cond']=='Soc',:]
        subdata=subdata.loc[subdata['Run']==rID,:]
    elif int(subj_links.loc[subj_links['scanID']==subjID,'CondNum'])==2 and rID>5:
        subdata=subdata.loc[subdata['Cond']=='NS',:]
        subdata=subdata.loc[subdata['Run']==rID-5,:]
    trials=subdata['transition'].tolist()
    #onsets=subdata['onset_raw'].tolist()
    durs=[trialdur]*len(trials)
    
    #randomize trial onsets
    onset_sample=np.arange(0,(n_scans-10),trialdur)
    onsets=np.sort(np.random.choice(onset_sample,size=len(trials),replace=False)).tolist()
    
    paradigm = pd.DataFrame({'trial_type': trials, 'onset': onsets,
                             'duration': durs})
    X1 = make_design_matrix(frame_times, paradigm, drift_model='polynomial',
                        add_regs=regs, add_reg_names=regnames, 
                        drift_order=3, hrf_model=None)
    return X1

In [None]:
ppivar = 'transition_PPI'

for n in range(len(param_list)): 
    #Set the subject and run IDs based on the parameter list generated above
    subjID=param_list[n][0]
    runID=param_list[n][1]
    print('Working on param # {}: subject {}, {}'.format(n+1,param_list[n][0],param_list[n][1]))
    print('...')

    data1,hm1=load_sub_data(subjID,runID)

    # Set path and filename to save the null model ppi matrix
    saveFile1=opj(path_OutpData5, ('{}_netLearn_{}_{}_410nodes_nulldist'.format(subjID,runID,ppivar)))

    ppi_null=np.zeros((n_perm,n_nodes,n_nodes))
    for perm_val in range(n_perm):
        print('')
        print('Working on permutation #{}'.format(perm_val+1))
        newdf=create_null_sub_design(subjID,runID,masterFile,hm1,hmlist)
        #n_nodes=data1.shape[1]
        ppiMat=np.zeros((n_nodes,n_nodes))
        newcondlist=[x for x in condlist if x in np.array(newdf.columns)]
        for ii in range(n_nodes):
            if ii==0:
                print('Working on node {}'.format(ii),end='')
            elif ii%50==0:
                print(ii,end='')
            elif ii%10==0:
                print('.',end='')

            ppidf,ppicols=create_ppi_design(newdf,data1[:,ii],newcondlist,covarlist)
            ppidf=ppidf[ppicols]
            hkernel = [hemodynamic_models.glover_hrf(tr, oversampling=1)]
            for x in ppicols[:-len(covarlist)]:
                ppidf.loc[:,x]=ss.zscore(np.convolve(ppidf.loc[:,x],hkernel[0])[:np.array(ppidf.loc[:,x]).size])
            for jj in range(n_nodes):
                if jj>ii:
                    conv_dv=ss.zscore(np.convolve(data1[:,jj],hkernel[0])[:np.array(data1[:,jj]).size])
                    betaweight=get_ppi_beta(ppidf,conv_dv,ppivar)
                    ppiMat[ii,jj]=betaweight

        # Convert to cfg_vec to symmetrize the upper and lower triangles

        # Compute upper triangle indices (by convention)
        triu_ix, triu_iy = np.triu_indices(n_nodes, k=1)
        #tril_ix, tril_iy = np.tril_indices(n_nodes, k=-1)

        # Convert to configuration matrix
        ppivec_upp = ppiMat[triu_ix, triu_iy]
        ppivec_low = ppiMat[triu_iy, triu_ix]

        # Compute mean cfg_vec
        ppivec=np.mean(np.vstack((ppivec_upp,ppivec_low)),axis=0)

        # Convert back to adjacency matrix
        ppiMat2=convert_cfg_vec_to_adj_matr(ppivec)

        #Add to ppi_null array
        ppi_null[perm_val,:,:]=ppiMat2

    #save important data for each subject/run to a new numpy zipped file
    np.savez(saveFile1, ppiMat=ppi_null)
    print('')
    print('Saved file: {}.npz'.format(saveFile1))
    print('...')

# 6: Combine run data for each subject

In [None]:
#ppiMat=matrix of ppi values for each node pair

def load_run_data(subjID,runID,condname,filepath):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    file1=os.path.join(filepath, ('{}_netLearn_{}_{}_410nodes.npz'.format(subjID,runID,condname)))
    subj_runData=np.load(file1)['ppiMat']
    subj_runData=convert_adj_matr_to_cfg_matr(np.expand_dims(subj_runData,axis=0))
    return subj_runData
    
def merge_task_data(subjlist,condname,filepath):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #Create zero matrices to fill with subject data
    combData=np.zeros((len(subjlist),n_conn))
    meanDataNS=np.zeros((len(subjlist),n_conn))
    meanDataSoc=np.zeros((len(subjlist),n_conn))
    diffData=np.zeros((len(subjlist),n_conn))

    for ii,subj in enumerate(subjlist):
        if ii==0:
            print('Working on subject #{}'.format(ii),end='')
        elif ii%(math.ceil(len(subjlist)/10))==0:
            print(ii,end='')
        elif ii%(math.ceil(len(subjlist)/50))==0:
            print('.'.format(ii),end='')
        subj_run1Data=load_run_data(subj,'run1',condname,filepath)
        subj_run2Data=load_run_data(subj,'run2',condname,filepath)
        subj_run3Data=load_run_data(subj,'run3',condname,filepath)
        subj_run4Data=load_run_data(subj,'run4',condname,filepath)
        subj_run5Data=load_run_data(subj,'run5',condname,filepath)
        subj_run6Data=load_run_data(subj,'run6',condname,filepath)
        subj_run7Data=load_run_data(subj,'run7',condname,filepath)
        subj_run8Data=load_run_data(subj,'run8',condname,filepath)
        subj_run9Data=load_run_data(subj,'run9',condname,filepath)
        subj_run10Data=load_run_data(subj,'run10',condname,filepath)
        
        #Average together the run data
        subj_meanData1=(subj_run1Data+subj_run2Data+subj_run3Data+subj_run4Data+subj_run5Data)/5
        subj_meanData2=(subj_run6Data+subj_run7Data+subj_run8Data+subj_run9Data+subj_run10Data)/5
        subj_meanData3=(subj_meanData1+subj_meanData2)/2
        
        #Note: ppt in Condition #1 saw non-social task first, 
        #whereas ppt in Condition #2 saw social task first
        if int(subj_links.loc[subj_links['scanID']==subj,'CondNum'])==1:
            meanDataNS[ii,:]=subj_meanData1
            meanDataSoc[ii,:]=subj_meanData2
        elif int(subj_links.loc[subj_links['scanID']==subj,'CondNum'])==2:  
            meanDataNS[ii,:]=subj_meanData2
            meanDataSoc[ii,:]=subj_meanData1
    
        diffData[ii,:]=meanDataSoc[ii,:]-meanDataNS[ii,:]
        combData[ii,:]=subj_meanData3
        
    #comb_cfg=convert_adj_matr_to_cfg_matr(combData)
    #ns_cfg=convert_adj_matr_to_cfg_matr(meanDataNS)
    #soc_cfg=convert_adj_matr_to_cfg_matr(meanDataSoc)
    #diff_cfg=convert_adj_matr_to_cfg_matr(diffData)
    saveFile1=os.path.join(filepath,'netLearn_comb_{}_{}subjs'.format(condname,len(subjlist)))
    saveFile2=os.path.join(filepath,'netLearn_nonSoc_{}_{}subjs'.format(condname,len(subjlist)))
    saveFile3=os.path.join(filepath,'netLearn_soc_{}_{}subjs'.format(condname,len(subjlist)))
    saveFile4=os.path.join(filepath,'netLearn_diff_{}_{}subjs'.format(condname,len(subjlist)))
    np.savez(saveFile1, ppiMat=combData, subjlist=subjlist)
    np.savez(saveFile2, ppiMat=meanDataNS, subjlist=subjlist)
    np.savez(saveFile3, ppiMat=meanDataSoc, subjlist=subjlist)
    np.savez(saveFile4, ppiMat=diffData, subjlist=subjlist)
    return combData,meanDataNS,meanDataSoc,diffData

combData,meanDataNS,meanDataSoc,diffData=merge_task_data(subjs,'transition_PPI',path_OutpData4)

# 7: Combine null models into single file

In [None]:
#ppiMat=matrix of ppi values for each node pair

def load_null_data(subjID,runID,condname,filepath):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    file1=os.path.join(filepath, ('{}_netLearn_{}_{}_410nodes_nulldist.npz'.format(subjID,runID,condname)))
    subj_runData=np.load(file1)['ppiMat']
    subj_runData=convert_adj_matr_to_cfg_matr(subj_runData)
    return subj_runData

#load_null_data(subjs[0],'run1','transition_PPI',path_NullData).shape


# In[6]:


def merge_null_data(subjlist,condname,filepath):
    '''
    Need to add description

    Parameters
    ----------
        varname: var type
            Description

    Returns
    -------
        varname: var type
            Description
    '''
    #Create zero matrices to fill with subject data
    combData=np.zeros((len(subjlist),n_perm,len(triu_ix)))
    meanDataNS=np.zeros((len(subjlist),n_perm,len(triu_ix)))
    meanDataSoc=np.zeros((len(subjlist),n_perm,len(triu_ix)))
    diffData=np.zeros((len(subjlist),n_perm,len(triu_ix)))

    for ii,subj in enumerate(subjlist):
        if ii==0:
            print('Working on subject #{}'.format(ii),end='')
        elif ii%(math.ceil(len(subjlist)/10))==0:
            print(ii,end='')
        elif ii%(math.ceil(len(subjlist)/50))==0:
            print('.'.format(ii),end='')
        
        subjData=np.zeros((len(runs),n_perm,n_conn))
        for rr,run in enumerate(runs):
            subjData[rr,:,:]=load_null_data(subj,runs[rr],condname,filepath)
        
        shuff_subjData=subjData.copy()
        for pp in range(n_perm):
            shuffle_runs=copy.copy(runs)
            shuffle(shuffle_runs)
            shuffle_runs=[int(x.replace('run','')) for x in shuffle_runs]
            for x in range(len(runs)):
                shuff_subjData[x,pp,:]=subjData[(shuffle_runs[x]-1),pp,:]
            
            #Average together the run data
            subj_meanData1=np.mean(shuff_subjData[range(5),pp,:],axis=0)
            subj_meanData2=np.mean(shuff_subjData[range(5,10),pp,:],axis=0)
            subj_meanData3=np.mean(shuff_subjData[:,pp,:],axis=0)
            
            meanDataNS[ii,pp,:]=subj_meanData1
            meanDataSoc[ii,pp,:]=subj_meanData2
            
            diffData[ii,pp,:]=subj_meanData1-subj_meanData2
            combData[ii,pp,:]=subj_meanData3
    
    saveFile1=os.path.join(filepath,'netLearn_comb_nulldata_{}_{}subjs'.format(condname,len(subjlist)))
    saveFile2=os.path.join(filepath,'netLearn_nonSoc_nulldata_{}_{}subjs'.format(condname,len(subjlist)))
    saveFile3=os.path.join(filepath,'netLearn_soc_nulldata_{}_{}subjs'.format(condname,len(subjlist)))
    saveFile4=os.path.join(filepath,'netLearn_diff_nulldata_{}_{}subjs'.format(condname,len(subjlist)))
    np.savez(saveFile1, cfgMat=combData, subjlist=subjlist)
    np.savez(saveFile2, cfgMat=meanDataNS, subjlist=subjlist)
    np.savez(saveFile3, cfgMat=meanDataSoc, subjlist=subjlist)
    np.savez(saveFile4, cfgMat=diffData, subjlist=subjlist)
    print('')
    print('Saved file {}'.format(saveFile1))
    print('Saved file {}'.format(saveFile2))
    print('Saved file {}'.format(saveFile3))
    print('Saved file {}'.format(saveFile4))

    return combData,meanDataNS,meanDataSoc,diffData

null_combData,null_meanDataNS,null_meanDataSoc,null_diffData=merge_null_data(subjs,'transition_PPI',path_OutpData5)

Steven Tompson | 2019