### TO DO: make class vars instead of nested functions

In [1]:
from nibabel import freesurfer as fs
import pandas as pd
import numpy as np
import os,glob
import re
import cPickle as pkl
from sklearn.feature_selection import variance_threshold
from sklearn.utils import resample
from joblib import Parallel,delayed

In [6]:
#subject directory and subject list
gitpath=os.getcwd()
SUBJECTS_DIR=(
    '/Volumes/Users/mbkranz/projects/'
    'ACT_Freesurfer_NewProc/'
)
os.chdir(SUBJECTS_DIR)
sublist=!ls -d ACT*_1
os.chdir(gitpath)
idx=pd.IndexSlice

### Import behavioral data or R prepped data

In [None]:
npdatafilt=pd.read_csv(
    '../data/np_filter_wb_gendernum.csv',
    na_values='NA',
    index_col="subs"
)

In [None]:
def linreg(X,Y):
    n=len(X)
    X = np.column_stack([np.ones(n),np.array(X)])
    ## Uses the equation (X'X)^(-1)X'Y 
    #to calculate OLS coefficient estimates:
    ##equivalent to the first element 
    #in the built in OLS fxn: np.linalg.lstsq
    return np.dot(
        np.linalg.inv(
            np.dot(X.T,X)),
        np.dot(X.T,Y)
    )

In [None]:
def bootstrap_regression(fs_np,y_np):
    ifs_np,iy_np=resample(fs_np,y_np)
    betas=linreg(iy_np,ifs_np)[1]
    return betas

In [None]:
#not necessary to nest functions??
#TO DO:
#make loop _bootstrap an object
#make run bootstrap as a fxn
def bootstrap_mediation(fs_np,x_np,c_np,y_np):
    #mediation based on 
    #Preacher and Hayes 2004
    def pervertex_mediate():
        def pervertex_regression():
            #npdata=subject-wise data 
            #(e.g., age,cognitive scores)
            #x_np,c_np,y_np=strings of npdata vars
            iX=np.column_stack(
                [ix_np,ic_np,icfs_np]
            )
            try:
                betas=linreg(
                    iX,iy_np
                )
            except:
                betas=np.array(
                    np.repeat(np.nan,iX.shape[1])
                )
            return betas
        ###1 direct Y=i+cX
        beta_c=linreg(
            np.column_stack(
                [ix_np,ic_np]
            ),
            iy_np)[1]
        ###2 mediation (for each vertex) Y=i+c'X+bM
        fs_iter=np.nditer(
            ifs_np,flags=['external_loop'],
            order='F'
        )
        beta_cprime=np.array(
            [pervertex_regression()[1] 
             for icfs_np in fs_iter]
        )
        ###3 M=i+aX
        #betas_m_a=linreg(npdata_df.filter([x,c]),fs_df)
        #indirect effect=c minus c prime --> 
        #equivalent to -cprime+c --> 
        #equivalent to a*b 
        indirect=np.add(-beta_cprime,beta_c)
        return indirect
    ifs_np,ix_np,ic_np,iy_np=resample(
        fs_np,x_np,c_np,y_np
    )
    test=pervertex_mediate()
    return test

In [16]:
#TO DO:
#make shorter fxns...
#make loop _bootstrap an object
#make run bootstrap as a fxn
def loop_bootstrap(
    np_name,
    meas,
    numsamples,
    mediation=False,
    x_name=None,
    c_name=None,
    y_name=None,
    basedir=(SUBJECTS_DIR+
            'ML_files/'),
    fwhm=10
):
    def calc_bootstrap_summary():
        #for each incoming sample x: 
        #(written with numpy functions for speed)
            #if... first sample initiate values,
        #else.... calculate running mean 
        #and standard deviation
            #prev_mean = m;
            #n = n + 1; (num_boot)
            #m = m + (x-m)/n; (bootobj_mean)
            #S = S + (x-m)*(x-prev_mean); (bootobj_q)
        num_boot=0
        for samplenum in xrange(numsamples):
            num_boot+=1
            if mediation==True:
                bootobj=bootstrap_mediation(
                    fsdatafilt.values,
                    npdatafilt[x_name].values,
                    npdatafilt[c_name].values,
                    npdatafilt[y_name].values
                )
            else:
                bootobj=bootstrap_regression(
                    fsdatafilt.values,
                    npdatafilt[np_name].values
                )
            if num_boot==1:
                bootobj_mean=bootobj
                bootobj_mean_minus1=np.zeros(
                    len(fsdatafilt.columns)
                )
                bootobj_q=np.zeros(
                    len(fsdatafilt.columns)
                )
            else:  
                bootobj_prevmean=bootobj_mean.copy()
                bootobj_mean=np.add(
                    bootobj_mean,
                    np.divide(
                        np.subtract(
                            bootobj,
                            bootobj_mean
                        ),
                        num_boot
                    )
                )
                bootobj_q=(
                np.add(
                    bootobj_q,
                       np.multiply(
                           np.subtract(
                               bootobj,
                               bootobj_mean),
                           np.subtract(
                               bootobj,
                               bootobj_prevmean
                           )
                       )
                      )
                )
        #final standard error estimate
        bootobj_se=(
            np.sqrt(
                np.divide(
                    bootobj_q,
                    (num_boot-1)
                )
            )
        )
        print(
            '  3.)FINISHED bootstrap'
            ' for x={},c={},y={}'
            .format(x_name,c_name,y_name)
        )
        boot_df=pd.DataFrame({
            'hemi_vertex':fsdatafilt.columns,
            'bootse':bootobj_se,
            'bootmean':bootobj_mean
        })
        return boot_df
    netpath=(
        '{}networks_ML/'
        '{}_fwhm{}_wholebrainfsaverage_df.pkl'
        .format(SUBJECTS_DIR,meas,str(fwhm))
    )
    fsdatafilt=pd.read_pickle(netpath)
    if mediation==True:
        print(
            '  2.) Starting mediation bootstrap '
            'for {} with x={},c={},y={}'
            .format(meas,x_name,c_name,y_name)
        )
        pklfile=(
            basedir+
            'wholebrain_bootstrap_'
            'vertex_mediation_'
            '{}_{}_x{}_c{}_y{}_pd.pkl'
            .format(np_name,
                    meas,
                    x_name,
                    c_name,
                    y_name)
        )
    else:
        print(
            '  2.) Starting regression bootstrap '
            'for {} on {}".format(meas,np_name)'
        )
        pklfile=(
            basedir+
            'wholebrain_bootstrap_'
            'vertex_regression_'
            '{}_{}_pd.pkl'
            .format(np_name,meas)
        )
    bootstrap=calc_bootstrap_summary()
    bootstrap['bootscore']=(
        bootstrap['bootmean']/
        bootstrap['bootse']
    )
    bootstrap.to_pickle(pklfile)
    print(pklfile+'......SAVED!')
    return pklfile

In [None]:
measure_list=['area','thickness']
np_name_list=['Memory','ExFunction']
hemilist=['rh','lh']
bootlist=['bootscore','bootmean']
meas_key={'thickness':np.int(0),'area':np.int(1)}
hemi_key={'lh':np.int(0),'rh':np.int(1)}
n=1000

In [None]:
boot_df_mediation=Parallel(n_jobs=4)(
    delayed(loop_bootstrap)(
        np_name=np_name,
        numsamples=n,
        meas=meas,
        x_name='Age',
        c_name='Gender',
        y_name=np_name,
        mediation=True
    ) 
    for meas in measure_list 
    for np_name in np_name_list
)

In [None]:
#np measure ~ morphometry regression
boot_df=Parallel(n_jobs=4)(
    delayed(loop_bootstrap)(
        np_name=np_name,
        numsamples=n,
        meas=meas) 
    for meas in measure_list 
    for np_name in np_name_list
)

In [None]:
def getannot(annotname):
    #initiate DataFrame
    #may want to make concatenation/join 
    #(instead of append) 
    #so can have one column 
    #per annotation/set of labels
    annot_df=[]
    for hemi in hemilist:
        annot_data=fs.read_annot(
            '/Applications/freesurfer/'
            'subjects/fsaverage/label/' + 
            hemi + 
            '.' + 
            annotname + 
            '.annot'
        )
        annot_hemi=pd.DataFrame({
            "annot_label" : annot_data[0],
            "annot_name": annotname, 
            "vertex_index" : range(
                len(annot_data[0])
            ), 
            "hemi": hemi_key[hemi]
        })
        annot_df.append(annot_hemi)
    annots=(pd.concat(annot_df)
            .set_index(['hemi','vertex_index'])
           )
    return annots
def makesurf(np_name,meas,
             mediation=False,
             x_name=None,
             c_name=None,
             y_name=None,
             basedir=(
                 '../data/'
                 'wholebrain_bootstrap/'
             ),
             curvdir='../curvoverlays/'):
    if mediation==True:
        pklfile=(
            'wholebrain_bootstrap_'
            'vertex_mediation_'
            '{}_{}_x{}_c{}_y{}_pd.pkl'
            .format(np_name,
                    meas,
                    x_name,
                    c_name,
                    y_name)
                )
    else:
        pklfile=(
            'wholebrain_bootstrap_'
            'vertex_regression_'
            '{}_{}_pd.pkl'
            .format(np_name,meas)
        )
    #nptask=y target neuropsych variable to filter, 
    #filepath=path to save file to, 
    #measure=column vector name to save
    #merge annotation labels 
    #(to allow re-sorting by hemi and then vertex index)
    #get annotations from both hemis
    #filter by hemisphere in the hemi_vertex strings, 
    #sort by hemi and vertex, extract values 
    #(labelled with integer col name 0)
    df=pd.read_pickle(basedir+pklfile)
    multi_i=pd.MultiIndex.from_tuples(
        df['hemi_vertex'],
        names=[0,'hemi','vertex_index']
    ).droplevel([0])
    del df['hemi_vertex']
    df.index=multi_i
    annots=getannot('Yeo2011_7Networks_N1000')
    finaldf=df.join(annots,how='right').reset_index()
    #write curv file for overlay 
    #for each hemi and bootstrap measure
    for bootmeas in bootlist:
        for hemi in hemilist:
            hemi_index=hemi_key[hemi]
            curv_vals=(
                finaldf
                .query('hemi==@hemi_index')
                [bootmeas]
                .values
            )
            file_path=(
                pklfile
                .replace(
                    'pd.pkl','_'
                    .join([bootmeas,hemi])+
                    '.curv'
                )
            fs.write_morph_data(
                file_like=curvdir+file_path,
                values=curv_vals
            )

In [None]:
for np_name in np_name_list:
    for measure in measure_list:
        makesurf(
            np_name,
            measure,
            mediation=True,
            x_name='Age',
            c_name='Gender',
            y_name=np_name
        )