## Notebook to runner for AMP-PD cohort and visit eQTL result for a specific gene prep via papermill

In [1]:
!date

Tue Jul 27 20:28:40 UTC 2021


#### import libraries

In [2]:
import pandas as pd
import os
import papermill as pm

  from pyarrow import HadoopFileSystem


In [3]:
# base notebook to be run per tuple iteration
base_notebook = '/home/jupyter/notebooks/gene_specific_eqtl_results.ipynb'

# setup iteration tuples
cohort_names = ['biofind', 'pdbp', 'ppmi']
version = 'amppdv1'
tissue = 'wb'
months = [0, 1, 6 , 12, 18, 24, 36]
quant_type = 'genes'
genes = ['RAB29', 'MAL', 'ZNF514', 'LINC01127', 'STK39', 'ZNF589', 'NME6', 
         'ARIH2', 'WDR6', 'IQCB1', 'KPNA1', 'P2RY14', 'AC092953.1', 'DGKQ', 
         'IDUA', 'DCAF16', 'LCORL', 'ERCC8', 'NDUFAF2', 'PAM', 'GIN1', 
         'PPIP5K2', 'ZSCAN16-AS1', 'HLA-A', 'KLHL7-DT', 'KLHL7', 'AK3P3', 
         'NUPL2', 'LINC00174', 'AC027644.3', 'STAG3L4', 'NEIL2', 'FDFT1', 
         'CTSB', 'AC145124.1', 'AC037459.3', 'AC009464.1', 'HCAR3', 'SLC25A21',
         'GPR65', 'SGF29', 'SULT1A1', 'TUFM', 'RABEP2', 'SPNS1', 'C16orf54', 
         'STX4', 'AC135050.3', 'AC135050.6', 'NOD2', 'AC007728.3', 'CHRNB1', 
         'AC016876.1', 'LRRC37A4P', 'AC091132.3', 'AC091132.4', 'RDM1P1', 
         'LINC02210', 'KANSL1', 'KANSL1-AS1', 'ARL17B', 'LRRC37A', 'LRRC37A2', 
         'ARL17A', 'NSF', 'AC005670.2']
# genes = ['LRRK2']

#### iterate over the tuples running the notebook per tuple

In [4]:
%%time

for cohort in cohort_names:
    out_nb_dir = f'/home/jupyter/notebooks/{cohort}/pm_gend_nbs'
    # make sure the notebook output dir exists
    os.makedirs(out_nb_dir, exist_ok=True)
    wrk_dir = f'/home/jupyter/{cohort}'
    for gene in genes:
        visits = []
        for month in months:
            # before running prep notebook make results exist for the cohort and visit
            results_file = f'{wrk_dir}/results/{cohort}.{tissue}{month}.cis.indep.csv'
            if os.path.isfile(results_file):
                visits.append(month)

        if len(visits) > 0:
            param_dict = {'cohort': cohort, 'gene': gene, 
                          'tissue': tissue, 'visits': visits}
            out_notebook = f'{out_nb_dir}/{cohort}.{tissue}.{gene}.specific_eqtl_results.ipynb'
            print(param_dict)
            print(out_notebook)
            try:
                pm.execute_notebook(input_path=base_notebook, output_path=out_notebook, 
                                    parameters=param_dict)
            except:
                print(f'{cohort}.{tissue}.{gene} encountered an error continuing on next')                

{'cohort': 'biofind', 'gene': 'LRRK2', 'tissue': 'wb', 'visits': [1]}
/home/jupyter/notebooks/biofind/pm_gend_nbs/biofind.wb.LRRK2.specific_eqtl_results.ipynb


Executing:   0%|          | 0/35 [00:00<?, ?cell/s]

{'cohort': 'pdbp', 'gene': 'LRRK2', 'tissue': 'wb', 'visits': [0, 6, 12, 18, 24]}
/home/jupyter/notebooks/pdbp/pm_gend_nbs/pdbp.wb.LRRK2.specific_eqtl_results.ipynb


Executing:   0%|          | 0/35 [00:00<?, ?cell/s]

{'cohort': 'ppmi', 'gene': 'LRRK2', 'tissue': 'wb', 'visits': [0, 6, 12, 24, 36]}
/home/jupyter/notebooks/ppmi/pm_gend_nbs/ppmi.wb.LRRK2.specific_eqtl_results.ipynb


Executing:   0%|          | 0/35 [00:00<?, ?cell/s]

CPU times: user 7.52 s, sys: 410 ms, total: 7.93 s
Wall time: 4min 23s
