In [10]:
import os
import numpy as np
import pandas as pd
from custom import utils
from collections import Counter
import inspect
import scipy.io

## Analyses overview. 
This notebook outlines the steps for a bayesian model that attempts to find genomic information (SNPs) that are relavent for the disease (in this case schizophrenia). It does so by finding SNPs that explain the variance in a set of structural MRI thickness (sMRI) measurements well. It then finds the sMRI features which predict the diagnosis well and. Together it uses the fact that G (SNPs) -> I (sMRI) -> Y (diagnosis) to prepose that the set of SNPs (G) which explain the variance well in the set of sMRI features (I) that are important for predicting diagnosis, are important genomic locations for the disease.

### Function for preprocessing
The <b>most_handed</b> function performs categorical imputation. This decision is made based on the fact that ~90 out of 1000+ handed values are missing. That small percentage is imputed with the most frequent handedness score "Right". There's also a very small percentage of non right non left values that are a mix of "Both", "Mixed", "Either", "Ambidextrous", these are all imputed to just "Both". The other function mean centers and scales the data. 

In [11]:
def most_handed(data):
    """data: pd.DataFrame"""
    counts = Counter(data)
    most = max(counts.items())[0]
    data = data.copy().fillna(0)
    data[data == 0] = 'Right'
    both = ['Both','Mixed','Either','Ambidextrous']
    for hand in both:
        data[data == hand] = 'Both'
    return data

def mean_center_n_scale(data):
    data_mean_zero = data - data.mean(0)
    data_scaled = data_mean_zero / data_mean_zero.std(0)
    return data_scaled

## Preprocessing function
Inputs are paths to data, sMRI features and SNPs respectively. "bcvar" is a list of covariates: ['SEX', 'AGE_MRI', 'EstimatedTotalIntraCranialVol', 'STUDY']. "brain_cols" is a numpy array of feature names to subset the sMRI data. 
First thing that happens is I get a boolen array of where the brain data contains only Controls or Schizophrenics in the Group feature. Both datasets are subsetted by this boolean array, row wise. It's important to note that the datasets loaded are already in the same order rowwise. Then I create 2 sets of covariate matrices and concatenate them into one. The first set contains AGE, SEX, and ICV(EstimatedTotalIntraCranialVol), the second one is a one hot encoded matrix of handedness, the third is a one hot encoded matrix of site(i.e, study). The first step of the analysis does not use the response variable but I safe it to apply stratifiedKFold cross-validation. Two dictionaries are returned, one contains keys and numpy arrays. The name of the keys are required inputs for the first part of the analysis. The dictionary containing the column headers is not required but is saved for later uses. This function also applies mean centering and scaling

In [59]:
def preprocess(brain_path, snp_path, bcvar, brain_cols):
    """brain_path: String, snp_path: String, bcvar: list, 
    brain_cols: list or np.ndarray. 
    """
    # load data
    brain_data = pd.read_hdf(brain_path)
    snp_data = pd.read_hdf(snp_path)
    # get the group status
    gr = brain_data.GROUP.values
    cnt_scz = np.logical_or(gr == 'Control', gr == 'Schizophrenia')
    # subset by indexes cnt_scz
    brain_data = brain_data.iloc[cnt_scz, :]
    snp_data = snp_data.iloc[cnt_scz, :]
    # create set of covariates
    icv = 'EstimatedTotalIntraCranialVol'
    cov_set1 = pd.DataFrame(
        data=np.hstack((snp_data.SEX.values[:, None],
                        brain_data.AGE_MRI.values[:, None],
                        brain_data[icv].values[:, None])),
        columns=['SEX','AGE','EstimatedTotalIntraCranialVol'])
    cov_set1 = cov_set1.fillna(0)
    cov_set1[cov_set1.AGE == 0] = cov_set1.AGE.mean()
    cov_site = utils.make_non_singular(utils.encoder(brain_data.STUDY.values))
    cov_site_cols = ['site{}'.format(i) for i in range(cov_site.shape[1])]
    cov_site = pd.DataFrame(data=cov_site, columns=cov_site_cols)
    cov_hand = utils.encoder(most_handed(brain_data.HANDED))
    cov_hand_cols = ['handed{}'.format(i) for i in range(cov_hand.shape[1])]
    cov_hand = pd.DataFrame(data=cov_hand, columns=cov_hand_cols)
    cvars = pd.concat([cov_set1, cov_site, cov_hand], axis=1)
    cvars_val = utils.make_non_singular(mean_center_n_scale(cvars.values))
    y = np.array([0 if i == 'Control' else 1 for i in brain_data.GROUP.values])
    return {'Z': cvars_val, 
            'I': mean_center_n_scale(brain_data[brain_cols].values), 
            'G': mean_center_n_scale(snp_data.iloc[:, 1:-5].values),
            'colnames': snp_data.iloc[:, 1:-5].columns.values,
            'y':y}, {'Z_cols': cvars.columns.values,
                     'I_cols': brain_cols,
                     'G_cols': snp_data.iloc[:, 1:-5].columns.values}

# Helper functions
These two functions assist in the analysis. save_preprocessed saves the data to disk that are outputed from the preprocess function above. The paths to where the files are written on disk are returned. This is because I'll be using nipype so to make life easier data isn't passed when interfacing with nipype nodes - just the path to where the data lives. I then load the data using the paths. The cv_maker function creates k-fold stratified cross validation indices and saves them. These indices are used in the matlab script to load the correct subsets of data. 

In [60]:
def save_preprocessed(preproc_data_dict, preproc_data_dict_col, save_path, dn, cn):
    """preproc_data_dict: dictionary object returned from
    the preprocessing function, (the first - zeroth value of the return) 
    preproc_data_dict_col: dintionary object returned from
    the preprocessing function, (the second - first value of the return)
    save_path: string - base path for saving the dictionaries
    dn: string - name for saving the data dictionary 
    cn: string - name for saving the column header dictionary
    """
    save_dict = os.path.join(save_path, dn)
    save_cols = os.path.join(save_path, cn)
    scipy.io.savemat(save_dict, mdict=preproc_data_dict)
    utils.save_pickle(save_cols, preproc_data_dict_col)
    return save_dict, save_cols

def cv_maker(data_path, save_path):
    import scipy.io
    from sklearn.model_selection import StratifiedKFold
    X = scipy.io.loadmat(data_path)['I']
    y = scipy.io.loadmat(data_path)['y'][0]
    cv = StratifiedKFold(n_splits=5, random_state=1)
    train_idx, test_idx = {}, {}
    for idx, (train, test) in enumerate(cv.split(X, y)):
        train_idx['train_{}'.format(idx + 1)] = train + 1
        test_idx['test_{}'.format(idx + 1)] = test + 1
    scipy.io.savemat(save_path, mdict={"train":train_idx, "test":test_idx})
    return save_path

# Create the input data, save the CV indices
The step below runs the functions I've made above

In [61]:
headers_dir = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers"
brain_cols = np.genfromtxt(os.path.join(headers_dir, "XB"), dtype=str)
brain_path = "/storage/gablab001/data/genus/GIT/genus/bayes/data_sets/brain_N1547_P5927_matched.hdf5py"
snp_path = "/storage/gablab001/data/genus/GIT/genus/bayes/data_sets/genomic_N1547_P100006_matched.hdf5py"
bcv = ['SEX', 'AGE_MRI', 'EstimatedTotalIntraCranialVol', 'STUDY']
all_data, all_cols = preprocess(brain_path, snp_path, bcv, brain_cols)
path_for_save = "/storage/gablab001/data/genus/GIT/genus/bayes/data_sets"
for_cv, _ = save_preprocessed(all_data, all_cols, path_for_save, "brain_gene.mat","brain_gene_cols.pkl")
cv_path = cv_maker(for_cv, os.path.join(path_for_save, "cv_idx.mat"))

In [15]:
from nipype import Function, Node, Workflow, IdentityInterface

# Bayesian analysis - "step 1" - this is not meant for cross validation.. yet we try all the same
Below I create the workflow that I use with nipype, create the nipype wrapper nodes to wrap functions that will go into the nipype graph, and then submit the jobs. Due to the nature of the analysis we are parallelizing over the feature space in the sMRI data. That is - one job per feature, on top of that we are parallelizing the cross validation step. In total this means there are (170*10) jobs that need to be submitted. For a single user in my experience that's too many jobs for the Openmind cluster so I limit the amount of jobs that can be submitted at a time. 

In [63]:
CV_maker = Node(interface=Function(
    input_names = ['data_path', 'save_path'],
    output_names = ['save_path'],
    function = cv_maker
), name = 'CV_maker')

#CV_maker.inputs.data_path = "/storage/gablab001/data/genus/GIT/genus/bayes/data_sets/brain_gene.mat"
#CV_maker.inputs.save_path = "/storage/gablab001/data/genus/GIT/genus/bayes/data_sets/cv_idx.mat"

wf = Workflow(name='brain_bcv')
wf.base_dir = "/om/scratch/Tue/ysa"

Iternode = Node(IdentityInterface(fields=['col_idx', 'cv_idx']), name = 'Iternode')
Iternode.iterables = [('col_idx', np.arange(170) + 1), ('cv_idx', np.arange(5) + 1)]

def run_bayes(in_file, cv_file, cv_idx, col_idx, out_file):
    import cPickle as pickle
    import numpy as np
    import os
    import nipype.interfaces.matlab as Matlab
    def outnames(col, out):
        return os.path.join(out, '{}.mat'.format(col))
    headers_dir = "/storage/gablab001/data/genus/fs_cog/pred_diag/column_headers"
    col_names = np.genfromtxt(os.path.join(headers_dir, "XB"), dtype=str)
    col_save_name = col_names[col_idx - 1] + "_{}_{}_BF".format(cv_idx, col_idx)
    with open("/storage/gablab001/data/genus/GIT/genus/bayes/matlab/bayes_reg.m", "r") as src:
        script = src.read().replace("\n", "")
    mat_file = outnames(in_file[:-4]+'_'+col_save_name, out_file)
    matlab = Matlab.MatlabCommand()
    matlab.inputs.script = script.format(in_file, cv_file, cv_idx, col_idx, mat_file)
    res = matlab.run()
    return mat_file

Run_bayes = Node(interface=Function(
    input_names = ['in_file', 'cv_file','cv_idx',
                   'col_idx','out_file'],
    output_names = ['mat_file'],
    function = run_bayes
), name='Run_bayes')

Run_bayes.inputs.in_file = "/storage/gablab001/data/genus/GIT/genus/bayes/data_sets/brain_gene.mat"
Run_bayes.inputs.cv_file = "/storage/gablab001/data/genus/GIT/genus/bayes/data_sets/cv_idx.mat"
Run_bayes.inputs.out_file = "/storage/gablab001/data/genus/GIT/genus/bayes/results/bayes_factor"

wf.connect(Iternode, 'cv_idx', Run_bayes, 'cv_idx')
wf.connect(Iternode, 'col_idx', Run_bayes, 'col_idx')
#wf.run(plugin='SLURM', plugin_args={'sbatch_args':'--mem=4G -t 23:00:00', 'max_jobs': 170})

170726-10:52:25,606 workflow INFO:
	 Workflow brain_bcv settings: ['check', 'execution', 'logging']
170726-10:52:27,783 workflow INFO:
	 Running in parallel.
170726-10:52:27,809 workflow INFO:
	 Pending[0] Submitting[170] jobs Slots[170]
170726-10:52:27,811 workflow INFO:
	 Submitting: Run_bayes.a185 ID: 0
170726-10:52:27,932 workflow INFO:
	 Finished submitting: Run_bayes.a185 ID: 0
170726-10:52:27,935 workflow INFO:
	 Submitting: Run_bayes.a529 ID: 1
170726-10:52:28,51 workflow INFO:
	 Finished submitting: Run_bayes.a529 ID: 1
170726-10:52:28,52 workflow INFO:
	 Submitting: Run_bayes.a302 ID: 2
170726-10:52:28,166 workflow INFO:
	 Finished submitting: Run_bayes.a302 ID: 2
170726-10:52:28,168 workflow INFO:
	 Submitting: Run_bayes.a591 ID: 3
170726-10:52:28,278 workflow INFO:
	 Finished submitting: Run_bayes.a591 ID: 3
170726-10:52:28,280 workflow INFO:
	 Submitting: Run_bayes.a071 ID: 4
170726-10:52:28,392 workflow INFO:
	 Finished submitting: Run_bayes.a071 ID: 4
170726-10:52:28,394

170726-10:52:33,767 workflow INFO:
	 Submitting: Run_bayes.a735 ID: 53
170726-10:52:33,916 workflow INFO:
	 Finished submitting: Run_bayes.a735 ID: 53
170726-10:52:33,918 workflow INFO:
	 Submitting: Run_bayes.a267 ID: 54
170726-10:52:34,22 workflow INFO:
	 Finished submitting: Run_bayes.a267 ID: 54
170726-10:52:34,24 workflow INFO:
	 Submitting: Run_bayes.a323 ID: 55
170726-10:52:34,130 workflow INFO:
	 Finished submitting: Run_bayes.a323 ID: 55
170726-10:52:34,131 workflow INFO:
	 Submitting: Run_bayes.a734 ID: 56
170726-10:52:34,237 workflow INFO:
	 Finished submitting: Run_bayes.a734 ID: 56
170726-10:52:34,239 workflow INFO:
	 Submitting: Run_bayes.a101 ID: 57
170726-10:52:34,347 workflow INFO:
	 Finished submitting: Run_bayes.a101 ID: 57
170726-10:52:34,349 workflow INFO:
	 Submitting: Run_bayes.a757 ID: 58
170726-10:52:34,459 workflow INFO:
	 Finished submitting: Run_bayes.a757 ID: 58
170726-10:52:34,461 workflow INFO:
	 Submitting: Run_bayes.a733 ID: 59
170726-10:52:34,570 workf

170726-10:52:39,902 workflow INFO:
	 Finished submitting: Run_bayes.a431 ID: 107
170726-10:52:39,904 workflow INFO:
	 Submitting: Run_bayes.a645 ID: 108
170726-10:52:40,13 workflow INFO:
	 Finished submitting: Run_bayes.a645 ID: 108
170726-10:52:40,15 workflow INFO:
	 Submitting: Run_bayes.a279 ID: 109
170726-10:52:40,123 workflow INFO:
	 Finished submitting: Run_bayes.a279 ID: 109
170726-10:52:40,125 workflow INFO:
	 Submitting: Run_bayes.a328 ID: 110
170726-10:52:40,233 workflow INFO:
	 Finished submitting: Run_bayes.a328 ID: 110
170726-10:52:40,235 workflow INFO:
	 Submitting: Run_bayes.a430 ID: 111
170726-10:52:40,345 workflow INFO:
	 Finished submitting: Run_bayes.a430 ID: 111
170726-10:52:40,347 workflow INFO:
	 Submitting: Run_bayes.a278 ID: 112
170726-10:52:40,455 workflow INFO:
	 Finished submitting: Run_bayes.a278 ID: 112
170726-10:52:40,457 workflow INFO:
	 Submitting: Run_bayes.a843 ID: 113
170726-10:52:40,566 workflow INFO:
	 Finished submitting: Run_bayes.a843 ID: 113
170

170726-10:52:45,883 workflow INFO:
	 Finished submitting: Run_bayes.a072 ID: 161
170726-10:52:45,885 workflow INFO:
	 Submitting: Run_bayes.a324 ID: 162
170726-10:52:45,995 workflow INFO:
	 Finished submitting: Run_bayes.a324 ID: 162
170726-10:52:45,996 workflow INFO:
	 Submitting: Run_bayes.a563 ID: 163
170726-10:52:46,105 workflow INFO:
	 Finished submitting: Run_bayes.a563 ID: 163
170726-10:52:46,107 workflow INFO:
	 Submitting: Run_bayes.a336 ID: 164
170726-10:52:46,214 workflow INFO:
	 Finished submitting: Run_bayes.a336 ID: 164
170726-10:52:46,215 workflow INFO:
	 Submitting: Run_bayes.a581 ID: 165
170726-10:52:46,323 workflow INFO:
	 Finished submitting: Run_bayes.a581 ID: 165
170726-10:52:46,325 workflow INFO:
	 Submitting: Run_bayes.a184 ID: 166
170726-10:52:46,432 workflow INFO:
	 Finished submitting: Run_bayes.a184 ID: 166
170726-10:52:46,434 workflow INFO:
	 Submitting: Run_bayes.a335 ID: 167
170726-10:52:46,540 workflow INFO:
	 Finished submitting: Run_bayes.a335 ID: 167
1

170726-11:00:44,948 workflow INFO:
	 [Job finished] jobname: Run_bayes.a844 jobid: 51
170726-11:00:45,60 workflow INFO:
	 [Job finished] jobname: Run_bayes.a589 jobid: 52
170726-11:00:46,158 workflow INFO:
	 [Job finished] jobname: Run_bayes.a319 jobid: 65
170726-11:00:46,509 workflow INFO:
	 [Job finished] jobname: Run_bayes.a692 jobid: 70
170726-11:00:47,17 workflow INFO:
	 [Job finished] jobname: Run_bayes.a289 jobid: 78
170726-11:00:47,609 workflow INFO:
	 [Job finished] jobname: Run_bayes.a287 jobid: 85
170726-11:00:47,960 workflow INFO:
	 [Job finished] jobname: Run_bayes.a285 jobid: 89
170726-11:00:56,536 workflow INFO:
	 Pending[162] Submitting[8] jobs Slots[8]
170726-11:00:56,538 workflow INFO:
	 Submitting: Run_bayes.a714 ID: 197
170726-11:00:56,645 workflow INFO:
	 Finished submitting: Run_bayes.a714 ID: 197
170726-11:00:56,647 workflow INFO:
	 Submitting: Run_bayes.a096 ID: 198
170726-11:00:56,753 workflow INFO:
	 Finished submitting: Run_bayes.a096 ID: 198
170726-11:00:56,

170726-11:02:38,552 workflow INFO:
	 Finished submitting: Run_bayes.a126 ID: 223
170726-11:02:38,554 workflow INFO:
	 Submitting: Run_bayes.a138 ID: 224
170726-11:02:38,661 workflow INFO:
	 Finished submitting: Run_bayes.a138 ID: 224
170726-11:02:38,663 workflow INFO:
	 Submitting: Run_bayes.a352 ID: 225
170726-11:02:38,774 workflow INFO:
	 Finished submitting: Run_bayes.a352 ID: 225
170726-11:02:38,776 workflow INFO:
	 Submitting: Run_bayes.a250 ID: 226
170726-11:02:38,881 workflow INFO:
	 Finished submitting: Run_bayes.a250 ID: 226
170726-11:02:38,883 workflow INFO:
	 Submitting: Run_bayes.a137 ID: 227
170726-11:02:38,993 workflow INFO:
	 Finished submitting: Run_bayes.a137 ID: 227
170726-11:02:38,995 workflow INFO:
	 Submitting: Run_bayes.a124 ID: 228
170726-11:02:39,103 workflow INFO:
	 Finished submitting: Run_bayes.a124 ID: 228
170726-11:02:39,105 workflow INFO:
	 Submitting: Run_bayes.a136 ID: 229
170726-11:02:39,212 workflow INFO:
	 Finished submitting: Run_bayes.a136 ID: 229
1

170726-11:03:35,819 workflow INFO:
	 [Job finished] jobname: Run_bayes.a212 jobid: 83
170726-11:03:35,927 workflow INFO:
	 [Job finished] jobname: Run_bayes.a434 jobid: 98
170726-11:03:36,773 workflow INFO:
	 [Job finished] jobname: Run_bayes.a430 jobid: 111
170726-11:03:37,303 workflow INFO:
	 [Job finished] jobname: Run_bayes.a163 jobid: 120
170726-11:03:38,227 workflow INFO:
	 [Job finished] jobname: Run_bayes.a499 jobid: 131
170726-11:03:48,552 workflow INFO:
	 Pending[163] Submitting[7] jobs Slots[7]
170726-11:03:48,553 workflow INFO:
	 Submitting: Run_bayes.a627 ID: 263
170726-11:03:48,661 workflow INFO:
	 Finished submitting: Run_bayes.a627 ID: 263
170726-11:03:48,663 workflow INFO:
	 Submitting: Run_bayes.a778 ID: 264
170726-11:03:48,771 workflow INFO:
	 Finished submitting: Run_bayes.a778 ID: 264
170726-11:03:48,773 workflow INFO:
	 Submitting: Run_bayes.a626 ID: 265
170726-11:03:48,879 workflow INFO:
	 Finished submitting: Run_bayes.a626 ID: 265
170726-11:03:48,881 workflow I

170726-11:05:17,669 workflow INFO:
	 [Job finished] jobname: Run_bayes.a335 jobid: 167
170726-11:05:28,139 workflow INFO:
	 Pending[161] Submitting[9] jobs Slots[9]
170726-11:05:28,141 workflow INFO:
	 Submitting: Run_bayes.a475 ID: 294
170726-11:05:28,252 workflow INFO:
	 Finished submitting: Run_bayes.a475 ID: 294
170726-11:05:28,254 workflow INFO:
	 Submitting: Run_bayes.a474 ID: 295
170726-11:05:28,362 workflow INFO:
	 Finished submitting: Run_bayes.a474 ID: 295
170726-11:05:28,364 workflow INFO:
	 Submitting: Run_bayes.a245 ID: 296
170726-11:05:28,472 workflow INFO:
	 Finished submitting: Run_bayes.a245 ID: 296
170726-11:05:28,474 workflow INFO:
	 Submitting: Run_bayes.a473 ID: 297
170726-11:05:28,579 workflow INFO:
	 Finished submitting: Run_bayes.a473 ID: 297
170726-11:05:28,580 workflow INFO:
	 Submitting: Run_bayes.a472 ID: 298
170726-11:05:28,687 workflow INFO:
	 Finished submitting: Run_bayes.a472 ID: 298
170726-11:05:28,688 workflow INFO:
	 Submitting: Run_bayes.a471 ID: 29

170726-11:07:42,550 workflow INFO:
	 Pending[164] Submitting[6] jobs Slots[6]
170726-11:07:42,553 workflow INFO:
	 Submitting: Run_bayes.a193 ID: 327
170726-11:07:42,697 workflow INFO:
	 Finished submitting: Run_bayes.a193 ID: 327
170726-11:07:42,699 workflow INFO:
	 Submitting: Run_bayes.a533 ID: 328
170726-11:07:42,814 workflow INFO:
	 Finished submitting: Run_bayes.a533 ID: 328
170726-11:07:42,816 workflow INFO:
	 Submitting: Run_bayes.a142 ID: 329
170726-11:07:42,931 workflow INFO:
	 Finished submitting: Run_bayes.a142 ID: 329
170726-11:07:42,933 workflow INFO:
	 Submitting: Run_bayes.a532 ID: 330
170726-11:07:43,45 workflow INFO:
	 Finished submitting: Run_bayes.a532 ID: 330
170726-11:07:43,47 workflow INFO:
	 Submitting: Run_bayes.a191 ID: 331
170726-11:07:43,156 workflow INFO:
	 Finished submitting: Run_bayes.a191 ID: 331
170726-11:07:43,158 workflow INFO:
	 Submitting: Run_bayes.a531 ID: 332
170726-11:07:43,268 workflow INFO:
	 Finished submitting: Run_bayes.a531 ID: 332
170726

170726-11:08:54,666 workflow INFO:
	 [Job finished] jobname: Run_bayes.a526 jobid: 9
170726-11:08:54,886 workflow INFO:
	 [Job finished] jobname: Run_bayes.a322 jobid: 121
170726-11:08:55,5 workflow INFO:
	 [Job finished] jobname: Run_bayes.a500 jobid: 129
170726-11:08:57,53 workflow INFO:
	 [Job finished] jobname: Run_bayes.a709 jobid: 211
170726-11:08:58,172 workflow INFO:
	 [Job finished] jobname: Run_bayes.a122 jobid: 231
170726-11:08:58,362 workflow INFO:
	 [Job finished] jobname: Run_bayes.a134 jobid: 233
170726-11:08:58,891 workflow INFO:
	 [Job finished] jobname: Run_bayes.a119 jobid: 239
170726-11:08:58,991 workflow INFO:
	 [Job finished] jobname: Run_bayes.a249 jobid: 241
170726-11:09:09,120 workflow INFO:
	 Pending[162] Submitting[8] jobs Slots[8]
170726-11:09:09,122 workflow INFO:
	 Submitting: Run_bayes.a078 ID: 362
170726-11:09:09,234 workflow INFO:
	 Finished submitting: Run_bayes.a078 ID: 362
170726-11:09:09,236 workflow INFO:
	 Submitting: Run_bayes.a135 ID: 363
170726

170726-11:10:49,738 workflow INFO:
	 Submitting: Run_bayes.a448 ID: 391
170726-11:10:49,845 workflow INFO:
	 Finished submitting: Run_bayes.a448 ID: 391
170726-11:10:49,847 workflow INFO:
	 Submitting: Run_bayes.a599 ID: 392
170726-11:10:49,955 workflow INFO:
	 Finished submitting: Run_bayes.a599 ID: 392
170726-11:10:49,957 workflow INFO:
	 Submitting: Run_bayes.a759 ID: 393
170726-11:10:50,66 workflow INFO:
	 Finished submitting: Run_bayes.a759 ID: 393
170726-11:10:50,68 workflow INFO:
	 Submitting: Run_bayes.a447 ID: 394
170726-11:10:50,177 workflow INFO:
	 Finished submitting: Run_bayes.a447 ID: 394
170726-11:10:50,179 workflow INFO:
	 Submitting: Run_bayes.a598 ID: 395
170726-11:10:50,290 workflow INFO:
	 Finished submitting: Run_bayes.a598 ID: 395
170726-11:10:50,292 workflow INFO:
	 Submitting: Run_bayes.a266 ID: 396
170726-11:10:50,405 workflow INFO:
	 Finished submitting: Run_bayes.a266 ID: 396
170726-11:10:50,407 workflow INFO:
	 Submitting: Run_bayes.a446 ID: 397
170726-11:10

170726-11:13:10,931 workflow INFO:
	 [Job finished] jobname: Run_bayes.a155 jobid: 142
170726-11:13:11,134 workflow INFO:
	 [Job finished] jobname: Run_bayes.a067 jobid: 175
170726-11:13:11,247 workflow INFO:
	 [Job finished] jobname: Run_bayes.a842 jobid: 196
170726-11:13:11,374 workflow INFO:
	 [Job finished] jobname: Run_bayes.a713 jobid: 200
170726-11:13:11,497 workflow INFO:
	 [Job finished] jobname: Run_bayes.a394 jobid: 202
170726-11:13:11,693 workflow INFO:
	 [Job finished] jobname: Run_bayes.a710 jobid: 209
170726-11:13:11,810 workflow INFO:
	 [Job finished] jobname: Run_bayes.a091 jobid: 212
170726-11:13:11,916 workflow INFO:
	 [Job finished] jobname: Run_bayes.a348 jobid: 234
170726-11:13:12,190 workflow INFO:
	 [Job finished] jobname: Run_bayes.a116 jobid: 246
170726-11:13:12,462 workflow INFO:
	 [Job finished] jobname: Run_bayes.a630 jobid: 256
170726-11:13:12,986 workflow INFO:
	 [Job finished] jobname: Run_bayes.a777 jobid: 266
170726-11:13:13,102 workflow INFO:
	 [Job f

170726-11:14:23,914 workflow INFO:
	 Submitting: Run_bayes.a668 ID: 458
170726-11:14:24,22 workflow INFO:
	 Finished submitting: Run_bayes.a668 ID: 458
170726-11:14:24,23 workflow INFO:
	 Submitting: Run_bayes.a315 ID: 459
170726-11:14:24,132 workflow INFO:
	 Finished submitting: Run_bayes.a315 ID: 459
170726-11:14:24,134 workflow INFO:
	 Submitting: Run_bayes.a752 ID: 460
170726-11:14:24,241 workflow INFO:
	 Finished submitting: Run_bayes.a752 ID: 460
170726-11:14:24,243 workflow INFO:
	 Submitting: Run_bayes.a667 ID: 461
170726-11:14:24,351 workflow INFO:
	 Finished submitting: Run_bayes.a667 ID: 461
170726-11:14:24,353 workflow INFO:
	 Submitting: Run_bayes.a314 ID: 462
170726-11:14:24,462 workflow INFO:
	 Finished submitting: Run_bayes.a314 ID: 462
170726-11:14:24,464 workflow INFO:
	 Submitting: Run_bayes.a666 ID: 463
170726-11:14:24,575 workflow INFO:
	 Finished submitting: Run_bayes.a666 ID: 463
170726-11:14:26,927 workflow INFO:
	 [Job finished] jobname: Run_bayes.a681 jobid: 2

170726-11:16:05,816 workflow INFO:
	 Finished submitting: Run_bayes.a304 ID: 491
170726-11:16:05,818 workflow INFO:
	 Submitting: Run_bayes.a656 ID: 492
170726-11:16:05,926 workflow INFO:
	 Finished submitting: Run_bayes.a656 ID: 492
170726-11:16:05,928 workflow INFO:
	 Submitting: Run_bayes.a303 ID: 493
170726-11:16:06,37 workflow INFO:
	 Finished submitting: Run_bayes.a303 ID: 493
170726-11:16:06,39 workflow INFO:
	 Submitting: Run_bayes.a750 ID: 494
170726-11:16:06,150 workflow INFO:
	 Finished submitting: Run_bayes.a750 ID: 494
170726-11:16:06,152 workflow INFO:
	 Submitting: Run_bayes.a731 ID: 495
170726-11:16:06,260 workflow INFO:
	 Finished submitting: Run_bayes.a731 ID: 495
170726-11:16:09,88 workflow INFO:
	 [Job finished] jobname: Run_bayes.a244 jobid: 304
170726-11:16:09,886 workflow INFO:
	 [Job finished] jobname: Run_bayes.a193 jobid: 327
170726-11:16:10,72 workflow INFO:
	 [Job finished] jobname: Run_bayes.a530 jobid: 334
170726-11:16:10,362 workflow INFO:
	 [Job finished

170726-11:17:39,397 workflow INFO:
	 [Job finished] jobname: Run_bayes.a448 jobid: 391
170726-11:18:23,494 workflow INFO:
	 Pending[167] Submitting[3] jobs Slots[3]
170726-11:18:23,497 workflow INFO:
	 Submitting: Run_bayes.a720 ID: 525
170726-11:18:23,611 workflow INFO:
	 Finished submitting: Run_bayes.a720 ID: 525
170726-11:18:23,613 workflow INFO:
	 Submitting: Run_bayes.a748 ID: 526
170726-11:18:23,724 workflow INFO:
	 Finished submitting: Run_bayes.a748 ID: 526
170726-11:18:23,726 workflow INFO:
	 Submitting: Run_bayes.a505 ID: 527
170726-11:18:23,836 workflow INFO:
	 Finished submitting: Run_bayes.a505 ID: 527
170726-11:18:26,43 workflow INFO:
	 [Job finished] jobname: Run_bayes.a622 jobid: 275
170726-11:18:26,528 workflow INFO:
	 [Job finished] jobname: Run_bayes.a146 jobid: 317
170726-11:18:26,631 workflow INFO:
	 [Job finished] jobname: Run_bayes.a195 jobid: 321
170726-11:18:26,836 workflow INFO:
	 [Job finished] jobname: Run_bayes.a531 jobid: 332
170726-11:18:27,30 workflow I

170726-11:19:33,165 workflow INFO:
	 Submitting: Run_bayes.a060 ID: 558
170726-11:19:33,273 workflow INFO:
	 Finished submitting: Run_bayes.a060 ID: 558
170726-11:19:35,884 workflow INFO:
	 [Job finished] jobname: Run_bayes.a241 jobid: 347
170726-11:19:36,344 workflow INFO:
	 [Job finished] jobname: Run_bayes.a077 jobid: 365
170726-11:19:36,887 workflow INFO:
	 [Job finished] jobname: Run_bayes.a759 jobid: 393
170726-11:19:37,211 workflow INFO:
	 [Job finished] jobname: Run_bayes.a266 jobid: 396
170726-11:19:38,281 workflow INFO:
	 [Job finished] jobname: Run_bayes.a239 jobid: 411
170726-11:19:38,883 workflow INFO:
	 [Job finished] jobname: Run_bayes.a235 jobid: 419
170726-11:19:49,826 workflow INFO:
	 Pending[164] Submitting[6] jobs Slots[6]
170726-11:19:49,829 workflow INFO:
	 Submitting: Run_bayes.a637 ID: 559
170726-11:19:49,938 workflow INFO:
	 Finished submitting: Run_bayes.a637 ID: 559
170726-11:19:49,940 workflow INFO:
	 Submitting: Run_bayes.a636 ID: 560
170726-11:19:50,49 wor

170726-11:21:08,247 workflow INFO:
	 Finished submitting: Run_bayes.a782 ID: 589
170726-11:21:08,249 workflow INFO:
	 Submitting: Run_bayes.a038 ID: 590
170726-11:21:08,358 workflow INFO:
	 Finished submitting: Run_bayes.a038 ID: 590
170726-11:21:08,360 workflow INFO:
	 Submitting: Run_bayes.a166 ID: 591
170726-11:21:08,472 workflow INFO:
	 Finished submitting: Run_bayes.a166 ID: 591
170726-11:21:08,474 workflow INFO:
	 Submitting: Run_bayes.a649 ID: 592
170726-11:21:08,583 workflow INFO:
	 Finished submitting: Run_bayes.a649 ID: 592
170726-11:21:08,585 workflow INFO:
	 Submitting: Run_bayes.a037 ID: 593
170726-11:21:08,698 workflow INFO:
	 Finished submitting: Run_bayes.a037 ID: 593
170726-11:21:08,700 workflow INFO:
	 Submitting: Run_bayes.a036 ID: 594
170726-11:21:08,809 workflow INFO:
	 Finished submitting: Run_bayes.a036 ID: 594
170726-11:21:10,922 workflow INFO:
	 [Job finished] jobname: Run_bayes.a541 jobid: 308
170726-11:21:11,226 workflow INFO:
	 [Job finished] jobname: Run_ba

170726-11:22:37,843 workflow INFO:
	 [Job finished] jobname: Run_bayes.a815 jobid: 442
170726-11:22:38,633 workflow INFO:
	 [Job finished] jobname: Run_bayes.a807 jobid: 457
170726-11:22:39,911 workflow INFO:
	 [Job finished] jobname: Run_bayes.a306 jobid: 485
170726-11:22:55,376 workflow INFO:
	 Pending[164] Submitting[6] jobs Slots[6]
170726-11:22:55,378 workflow INFO:
	 Submitting: Run_bayes.a422 ID: 623
170726-11:22:55,500 workflow INFO:
	 Finished submitting: Run_bayes.a422 ID: 623
170726-11:22:55,503 workflow INFO:
	 Submitting: Run_bayes.a699 ID: 624
170726-11:22:57,652 workflow INFO:
	 Finished submitting: Run_bayes.a699 ID: 624
170726-11:22:57,654 workflow INFO:
	 Submitting: Run_bayes.a413 ID: 625
170726-11:22:57,778 workflow INFO:
	 Finished submitting: Run_bayes.a413 ID: 625
170726-11:22:57,780 workflow INFO:
	 Submitting: Run_bayes.a421 ID: 626
170726-11:22:57,901 workflow INFO:
	 Finished submitting: Run_bayes.a421 ID: 626
170726-11:22:57,903 workflow INFO:
	 Submitting: 

170726-11:24:24,597 workflow INFO:
	 Pending[161] Submitting[9] jobs Slots[9]
170726-11:24:24,599 workflow INFO:
	 Submitting: Run_bayes.a805 ID: 654
170726-11:24:24,709 workflow INFO:
	 Finished submitting: Run_bayes.a805 ID: 654
170726-11:24:24,711 workflow INFO:
	 Submitting: Run_bayes.a846 ID: 655
170726-11:24:24,822 workflow INFO:
	 Finished submitting: Run_bayes.a846 ID: 655
170726-11:24:24,824 workflow INFO:
	 Submitting: Run_bayes.a845 ID: 656
170726-11:24:24,934 workflow INFO:
	 Finished submitting: Run_bayes.a845 ID: 656
170726-11:24:24,936 workflow INFO:
	 Submitting: Run_bayes.a554 ID: 657
170726-11:24:25,45 workflow INFO:
	 Finished submitting: Run_bayes.a554 ID: 657
170726-11:24:25,47 workflow INFO:
	 Submitting: Run_bayes.a834 ID: 658
170726-11:24:25,155 workflow INFO:
	 Finished submitting: Run_bayes.a834 ID: 658
170726-11:24:25,157 workflow INFO:
	 Submitting: Run_bayes.a553 ID: 659
170726-11:24:25,270 workflow INFO:
	 Finished submitting: Run_bayes.a553 ID: 659
170726

170726-11:26:02,422 workflow INFO:
	 [Job finished] jobname: Run_bayes.a667 jobid: 461
170726-11:26:02,536 workflow INFO:
	 [Job finished] jobname: Run_bayes.a657 jobid: 490
170726-11:26:02,641 workflow INFO:
	 [Job finished] jobname: Run_bayes.a656 jobid: 492
170726-11:26:03,191 workflow INFO:
	 [Job finished] jobname: Run_bayes.a509 jobid: 516
170726-11:26:03,294 workflow INFO:
	 [Job finished] jobname: Run_bayes.a723 jobid: 517
170726-11:26:03,637 workflow INFO:
	 [Job finished] jobname: Run_bayes.a507 jobid: 521
170726-11:26:03,920 workflow INFO:
	 [Job finished] jobname: Run_bayes.a837 jobid: 529
170726-11:26:04,27 workflow INFO:
	 [Job finished] jobname: Run_bayes.a172 jobid: 532
170726-11:26:04,251 workflow INFO:
	 [Job finished] jobname: Run_bayes.a575 jobid: 536
170726-11:26:04,789 workflow INFO:
	 [Job finished] jobname: Run_bayes.a654 jobid: 544
170726-11:26:05,814 workflow INFO:
	 [Job finished] jobname: Run_bayes.a745 jobid: 556
170726-11:26:16,604 workflow INFO:
	 Pending

170726-11:27:43,4 workflow INFO:
	 Submitting: Run_bayes.a205 ID: 721
170726-11:27:43,113 workflow INFO:
	 Finished submitting: Run_bayes.a205 ID: 721
170726-11:27:43,115 workflow INFO:
	 Submitting: Run_bayes.a608 ID: 722
170726-11:27:43,223 workflow INFO:
	 Finished submitting: Run_bayes.a608 ID: 722
170726-11:27:43,225 workflow INFO:
	 Submitting: Run_bayes.a204 ID: 723
170726-11:27:43,333 workflow INFO:
	 Finished submitting: Run_bayes.a204 ID: 723
170726-11:27:43,335 workflow INFO:
	 Submitting: Run_bayes.a607 ID: 724
170726-11:27:43,444 workflow INFO:
	 Finished submitting: Run_bayes.a607 ID: 724
170726-11:27:45,877 workflow INFO:
	 [Job finished] jobname: Run_bayes.a730 jobid: 498
170726-11:27:46,969 workflow INFO:
	 [Job finished] jobname: Run_bayes.a640 jobid: 554
170726-11:27:48,65 workflow INFO:
	 [Job finished] jobname: Run_bayes.a789 jobid: 580
170726-11:28:27,645 workflow INFO:
	 Pending[167] Submitting[3] jobs Slots[3]
170726-11:28:27,647 workflow INFO:
	 Submitting: Run

170726-11:29:38,172 workflow INFO:
	 Finished submitting: Run_bayes.a822 ID: 754
170726-11:29:38,174 workflow INFO:
	 Submitting: Run_bayes.a056 ID: 755
170726-11:29:38,284 workflow INFO:
	 Finished submitting: Run_bayes.a056 ID: 755
170726-11:29:40,523 workflow INFO:
	 [Job finished] jobname: Run_bayes.a061 jobid: 496
170726-11:29:40,643 workflow INFO:
	 [Job finished] jobname: Run_bayes.a722 jobid: 519
170726-11:29:42,495 workflow INFO:
	 [Job finished] jobname: Run_bayes.a028 jobid: 603
170726-11:29:42,598 workflow INFO:
	 [Job finished] jobname: Run_bayes.a027 jobid: 604
170726-11:29:42,862 workflow INFO:
	 [Job finished] jobname: Run_bayes.a428 jobid: 607
170726-11:29:43,388 workflow INFO:
	 [Job finished] jobname: Run_bayes.a704 jobid: 613
170726-11:29:54,812 workflow INFO:
	 Pending[164] Submitting[6] jobs Slots[6]
170726-11:29:54,814 workflow INFO:
	 Submitting: Run_bayes.a040 ID: 756
170726-11:29:54,926 workflow INFO:
	 Finished submitting: Run_bayes.a040 ID: 756
170726-11:29:

170726-11:31:11,127 workflow INFO:
	 Finished submitting: Run_bayes.a457 ID: 784
170726-11:31:11,129 workflow INFO:
	 Submitting: Run_bayes.a356 ID: 785
170726-11:31:11,238 workflow INFO:
	 Finished submitting: Run_bayes.a356 ID: 785
170726-11:31:11,240 workflow INFO:
	 Submitting: Run_bayes.a832 ID: 786
170726-11:31:11,348 workflow INFO:
	 Finished submitting: Run_bayes.a832 ID: 786
170726-11:31:11,350 workflow INFO:
	 Submitting: Run_bayes.a456 ID: 787
170726-11:31:11,458 workflow INFO:
	 Finished submitting: Run_bayes.a456 ID: 787
170726-11:31:11,459 workflow INFO:
	 Submitting: Run_bayes.a355 ID: 788
170726-11:31:11,570 workflow INFO:
	 Finished submitting: Run_bayes.a355 ID: 788
170726-11:31:11,572 workflow INFO:
	 Submitting: Run_bayes.a455 ID: 789
170726-11:31:11,682 workflow INFO:
	 Finished submitting: Run_bayes.a455 ID: 789
170726-11:31:11,684 workflow INFO:
	 Submitting: Run_bayes.a354 ID: 790
170726-11:31:11,793 workflow INFO:
	 Finished submitting: Run_bayes.a354 ID: 790
1

170726-11:32:42,369 workflow INFO:
	 [Job finished] jobname: Run_bayes.a566 jobid: 664
170726-11:32:43,238 workflow INFO:
	 [Job finished] jobname: Run_bayes.a009 jobid: 682
170726-11:32:43,429 workflow INFO:
	 [Job finished] jobname: Run_bayes.a057 jobid: 684
170726-11:32:54,676 workflow INFO:
	 Pending[165] Submitting[5] jobs Slots[5]
170726-11:32:54,677 workflow INFO:
	 Submitting: Run_bayes.a015 ID: 820
170726-11:32:54,785 workflow INFO:
	 Finished submitting: Run_bayes.a015 ID: 820
170726-11:32:54,787 workflow INFO:
	 Submitting: Run_bayes.a670 ID: 821
170726-11:32:54,894 workflow INFO:
	 Finished submitting: Run_bayes.a670 ID: 821
170726-11:32:54,895 workflow INFO:
	 Submitting: Run_bayes.a014 ID: 822
170726-11:32:55,6 workflow INFO:
	 Finished submitting: Run_bayes.a014 ID: 822
170726-11:32:55,8 workflow INFO:
	 Submitting: Run_bayes.a669 ID: 823
170726-11:32:55,126 workflow INFO:
	 Finished submitting: Run_bayes.a669 ID: 823
170726-11:32:55,128 workflow INFO:
	 Submitting: Run_

170726-11:34:49,934 workflow INFO:
	 [Job finished] jobname: Run_bayes.a211 jobid: 704
170726-11:34:50,377 workflow INFO:
	 [Job finished] jobname: Run_bayes.a398 jobid: 710
170726-11:34:51,399 workflow INFO:
	 [Job finished] jobname: Run_bayes.a203 jobid: 725
170726-11:35:04,102 workflow INFO:
	 [Job finished] jobname: Run_bayes.a849 jobid: 650
170726-11:35:04,222 workflow INFO:
	 [Job finished] jobname: Run_bayes.a847 jobid: 653
170726-11:35:05,801 workflow INFO:
	 [Job finished] jobname: Run_bayes.a399 jobid: 707
170726-11:35:06,935 workflow INFO:
	 [Job finished] jobname: Run_bayes.a216 jobid: 726
170726-11:35:19,985 workflow INFO:
	 [Job finished] jobname: Run_bayes.a545 jobid: 687
170726-11:35:21,127 workflow INFO:
	 [Job finished] jobname: Run_bayes.a613 jobid: 708
170726-11:35:21,450 workflow INFO:
	 [Job finished] jobname: Run_bayes.a208 jobid: 713
170726-11:35:59,617 workflow INFO:
	 [Job finished] jobname: Run_bayes.a443 jobid: 407
170726-11:35:59,762 workflow INFO:
	 [Job f

170726-11:39:39,945 workflow INFO:
	 [Job finished] jobname: Run_bayes.a022 jobid: 802
170726-11:39:45,315 workflow INFO:
	 [Job finished] jobname: Run_bayes.a002 jobid: 694
170726-11:39:46,438 workflow INFO:
	 [Job finished] jobname: Run_bayes.a362 jobid: 770
170726-11:39:46,779 workflow INFO:
	 [Job finished] jobname: Run_bayes.a457 jobid: 784
170726-11:39:46,919 workflow INFO:
	 [Job finished] jobname: Run_bayes.a356 jobid: 785
170726-11:39:47,884 workflow INFO:
	 [Job finished] jobname: Run_bayes.a677 jobid: 803
170726-11:39:48,69 workflow INFO:
	 [Job finished] jobname: Run_bayes.a021 jobid: 805
170726-11:39:49,8 workflow INFO:
	 [Job finished] jobname: Run_bayes.a015 jobid: 820
170726-11:39:53,528 workflow INFO:
	 [Job finished] jobname: Run_bayes.a044 jobid: 744
170726-11:39:54,771 workflow INFO:
	 [Job finished] jobname: Run_bayes.a680 jobid: 794
170726-11:39:56,75 workflow INFO:
	 [Job finished] jobname: Run_bayes.a670 jobid: 821
170726-11:39:56,353 workflow INFO:
	 [Job finis

<networkx.classes.digraph.DiGraph at 0x2b11fbfb3a10>

# Set up for step 2
Step 1 above uses an updated version of varbvs and in <b>bayes_reg.m</b> the normalization step is already performed. The matlab file <b>deployendophenVB.m</b> wants a csv file for step 2 that points to the output from step 1. The functions below create one of those for each fold. So 5 folds, 5 files where each file has 170 paths. 

In [134]:
bayes_factor_outputs = "/storage/gablab001/data/genus/GIT/genus/bayes/results/bayes_factor/5foldcv_allcov"
bf_res = [os.path.join(bayes_factor_outputs,x) for x in os.listdir(bayes_factor_outputs)]

def make_file(paths):
    cv = [i.split('_')[-3] for i in paths]
    df = pd.DataFrame({'path':paths, 'cv':cv})
    groups = df.groupby('cv')
    df_store = []
    for group in groups.groups:
        gr = groups.get_group(group).reset_index(drop=True).drop('cv', 1)
        gr['sorter'] = [int(i.split('_')[-2]) for i in gr.path]
        gr = gr.sort_values('sorter').reset_index(drop=True)
        gr.columns=['matFn','colNum']
        df_store.append(gr[['colNum', 'matFn']])
    return df_store
 
for idx, f in enumerate(make_file(bf_res)):
    f.to_csv('BFRESULT_CV_{}.csv'.format(idx+1), index=None)

# FixedFormVB "Step 2"
This is where it breaks. The error thrown by python here isn't usefull so to try and decipher what's happening matlab needs to be opened and this step run in there. Thankfully this outputs a pyscript.m file which cant be copy and pasted directly into matlab.

In [135]:
def run_fixedformVB():
    import nipype.interfaces.matlab as Matlab
    matlab = Matlab.MatlabCommand()
    matlab.inputs.paths = [
        '/storage/gablab001/data/genus/bayes_adni/basis/bayesianImagingGenetics/src',
        '/storage/gablab001/data/genus/bayes_adni/basis/bayesianImagingGenetics/src/Utils']
    matlab.inputs.script = """
    deployEndoPhenVB('step','fxvb',...
                    'csvFile','/storage/gablab001/data/genus/GIT/genus/bayes/results/bayes_factor/BFRESULT_CV_1.csv',...
                    'inputMat','/storage/gablab001/data/genus/GIT/genus/bayes/data_sets/brain_gene.mat',...
                    'outFile','/om/user/ysa/ffvb_test.mat')
    """
    res = matlab.run()

In [136]:
run_fixedformVB()

UnicodeEncodeError: 'ascii' codec can't encode character u'\xd7' in position 7083: ordinal not in range(128)