## Notebook to runner for AMP-PD tissue and visit expression prep via papermill

#### import libraries

In [1]:
import pandas as pd
import os
import papermill as pm

In [2]:
# base notebook to be run per tuple iteration
base_notebook = '/home/jupyter/notebooks/visit_expression_prep.ipynb'

# setup iteration tuples
cohort_names = ['biofind', 'pdbp', 'ppmi']
cohort_abbrvs = ['BF', 'PD', 'PP']
version = 'amppdv1'
tissue = 'wb'
months = [0, 1, 6 , 12, 18, 24, 36]
visit_names = ['BLM0T1', 'SVM0_5T1', 'SVM6T1', 'SVM12T1', 'SVM18T1', 'SVM24T1', 'SVM36T1']
column_names = ['cohort', 'amp_abbr', 'version', 'tissue', 'visit', 'visit_name']
# nb_combo_df = pd.DataFrame(data=X, columns=column_names)
quant_type = 'genes'

#### iterate over the tuples running the notebook per tuple

In [3]:
%%time

cohorts = zip(cohort_names, cohort_abbrvs)
for cohort, abbrv in cohorts:
    out_nb_dir = f'/home/jupyter/notebooks/{cohort}/pm_gend_nbs'
    # make sure the notebook output dir exists
    os.makedirs(out_nb_dir, exist_ok=True)
    wrk_dir = f'/home/jupyter/{cohort}'
    visits = zip(months, visit_names)
    for month, visit in visits:
        # before running prep notebook make expr data for the cohort and visit
        src_expression_matrix = f'/home/jupyter/amppd/expression/{abbrv}.{visit}.{quant_type}.hdf5'
        if os.path.isfile(src_expression_matrix):
            param_dict = {'cohort': cohort, 'amp_abbr': abbrv, 'version': version, 
                          'tissue': tissue, 'visit': month, 'visit_name': visit}
            out_notebook = f'{out_nb_dir}/{cohort}_{tissue}_{visit}_expression_prep.ipynb'
            print(param_dict)
            print(out_notebook)
            pm.execute_notebook(input_path=base_notebook, output_path=out_notebook, 
                                parameters=param_dict)

{'cohort': 'biofind', 'amp_abbr': 'BF', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 1, 'visit_name': 'SVM0_5T1'}
/home/jupyter/notebooks/biofind/pm_gend_nbs/biofind_wb_SVM0_5T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'pdbp', 'amp_abbr': 'PD', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 0, 'visit_name': 'BLM0T1'}
/home/jupyter/notebooks/pdbp/pm_gend_nbs/pdbp_wb_BLM0T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'pdbp', 'amp_abbr': 'PD', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 6, 'visit_name': 'SVM6T1'}
/home/jupyter/notebooks/pdbp/pm_gend_nbs/pdbp_wb_SVM6T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'pdbp', 'amp_abbr': 'PD', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 12, 'visit_name': 'SVM12T1'}
/home/jupyter/notebooks/pdbp/pm_gend_nbs/pdbp_wb_SVM12T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'pdbp', 'amp_abbr': 'PD', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 18, 'visit_name': 'SVM18T1'}
/home/jupyter/notebooks/pdbp/pm_gend_nbs/pdbp_wb_SVM18T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'pdbp', 'amp_abbr': 'PD', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 24, 'visit_name': 'SVM24T1'}
/home/jupyter/notebooks/pdbp/pm_gend_nbs/pdbp_wb_SVM24T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'ppmi', 'amp_abbr': 'PP', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 0, 'visit_name': 'BLM0T1'}
/home/jupyter/notebooks/ppmi/pm_gend_nbs/ppmi_wb_BLM0T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'ppmi', 'amp_abbr': 'PP', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 6, 'visit_name': 'SVM6T1'}
/home/jupyter/notebooks/ppmi/pm_gend_nbs/ppmi_wb_SVM6T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'ppmi', 'amp_abbr': 'PP', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 12, 'visit_name': 'SVM12T1'}
/home/jupyter/notebooks/ppmi/pm_gend_nbs/ppmi_wb_SVM12T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'ppmi', 'amp_abbr': 'PP', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 24, 'visit_name': 'SVM24T1'}
/home/jupyter/notebooks/ppmi/pm_gend_nbs/ppmi_wb_SVM24T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

{'cohort': 'ppmi', 'amp_abbr': 'PP', 'version': 'amppdv1', 'tissue': 'wb', 'visit': 36, 'visit_name': 'SVM36T1'}
/home/jupyter/notebooks/ppmi/pm_gend_nbs/ppmi_wb_SVM36T1_expression_prep.ipynb


Executing:   0%|          | 0/78 [00:00<?, ?cell/s]

CPU times: user 1min 37s, sys: 2.34 s, total: 1min 40s
Wall time: 48min 47s
