__run permutation tests from `regression_mediation_pervertex_wholebrain` script__

In [1]:
#general packages used for data import etc
from nibabel import freesurfer as fs
import pandas as pd
import numpy as np
import os,glob
import re
import cPickle as pkl #using csv now for file formats (except cv indices currently)
from sklearn.feature_selection import variance_threshold
from sklearn.utils import resample
from joblib import Parallel,delayed
import cPickle as pkl
idx=pd.IndexSlice

In [2]:
measure_list=['area','thickness']
np_name_list=['Memory','ExFunction']
hemilist=['rh','lh']
bootlist=['bootscore','bootmean']
meas_key={'thickness':np.int(0),'area':np.int(1)}
hemi_key={'lh':np.int(0),'rh':np.int(1)}

In [3]:
def read_assign_pkl_df(hemi,meas,np_name,anal):
    pklname=('../data/wholebrain_bootstrap/'+
             '{}_{}_{}_{}_boot_score.pkl'.format(hemi,meas,np_name,anal))
    df=pd.read_pickle(pklname).assign(hemi=hemi_key[hemi],
                             meas=meas_key[meas],
                             np_name=np_name,
                             anal=anal)
    return df

In [4]:
def getannot(annotname):
    #initiate DataFrame
    #may want to make concatenation/join (instead of append) 
    #so can have one column per annotation/set of labels
    annot_df=[]
    for hemi in hemilist:
        annot_data=fs.read_annot(
            '/Applications/freesurfer/subjects/fsaverage/label/'+
            hemi + '.' + annotname + '.annot')
        annot_hemi=pd.DataFrame(
            {"annot_label" : annot_data[0],
             "annot_name": annotname, 
             "vertex_index" : range(
                 len(annot_data[0])), 
             "hemi": hemi_key[hemi]})
        annot_df.append(annot_hemi)
    annots=pd.concat(annot_df).set_index(
        ['hemi','vertex_index'])
    return annots

In [5]:
def get_network_summary(df,threshList=[1.96,2.58,3.3,3.9],
                        iteration='computed'):
    '''get a summary of the number of vertices that pass a list of
    thresholds'''
    def compute_summary(group):
        '''to be applied in pd.groupby.apply
        summarizes the total number of vertices that pass boot score
        threshold based on lists of different thresholds'''
        total_sigvertices=[
            (group>thresh).sum()+
            (group<(-thresh)).sum()
            for thresh in threshList]
        return pd.Series(total_sigvertices,
                         name='total_sigvertices',
                         index=pd.Index(threshList,name='thresh'))
    df_copy=df.copy()
    #for permutation test: shuffle annotation labels
    if iteration!='computed': 
        df_copy['annot_label']=(df_copy['annot_label']
                                .sample(frac=1)
                                .values)
    select_summary=(df_copy
                    .set_index(['annot_label'])
                    ['boot_score']
                    .groupby(level=['annot_label'])
                    .apply(compute_summary))
    return select_summary

In [6]:
def _chunk_permutation_test(bootdf,permList,
                     threshList=[1.96,2.58,3.3,3.9]):
    empirical_vals=(bootdf
            .query('annot_label!=0')
            .groupby(['anal','meas','np_name'])
            .apply(get_network_summary)
            .stack(['annot_label','thresh']))
    perm_greaterthan=empirical_vals.transform(lambda x: 0)
    perm_totalsigvertices=empirical_vals.transform(lambda x: 0)
    for i in permList:
        perm_vals=(bootdf
            .query('annot_label!=0')
            .groupby(['anal','meas','np_name'])
            .apply(get_network_summary,iteration=i)
            .stack(['annot_label','thresh']))
        perm_greaterthan+=perm_vals>=empirical_vals
        perm_totalsigvertices+=perm_vals
    return (perm_greaterthan,perm_totalsigvertices)

In [7]:
boot_df=pd.concat(read_assign_pkl_df(hemi,meas,np_name,anal) 
               for hemi in hemilist
               for meas in meas_key.iterkeys()
               for np_name in np_name_list
               for anal in ['mediate','pcorr_cog'])

In [8]:
n_jobs=10
num_perm=100000

In [None]:
empirical_vals=(boot_df
            .query('annot_label!=0')
            .groupby(['anal','meas','np_name'])
            .apply(get_network_summary)
            .stack(['annot_label','thresh']))
permList=np.arange(0,num_perm)
permChunks=np.split(permList,n_jobs)
permResultList=Parallel(n_jobs)(
    delayed(_chunk_permutation_test)
    (boot_df,perms) for perms in permChunks)

In [None]:
perm_greaterthan=empirical_vals.transform(lambda x: 0)
perm_totalsigvertices=empirical_vals.transform(lambda x: 0)
for chunk_greaterthan,chunk_total in permResultList:
    perm_greaterthan+=chunk_greaterthan
    perm_totalsigvertices+=chunk_total
perm_meansigvertices=perm_totalsigvertices/num_perm
perm_pvals=perm_greaterthan/num_perm

In [None]:
def rename_series(series_name):
    return eval(series_name).rename(series_name)
perm_name_List=['empirical_vals','perm_greaterthan',
                'perm_meansigvertices','perm_pvals']
summary_perms=pd.DataFrame([rename_series(x) for x in perm_name_List]).T

In [None]:
(summary_perms.to_csv(
     '../data/wholebrain_bootstrap/'
     'wholebrain_bootstrap_
     'bootscore_permtests_4_17_18.csv'))