## Notebook to run the glmmTMB analysis per brain region and cell-type with papermill

originally coded to run in parallel out of a single notebook, but always have a probably passing a anndata object to ray or concurrent futures

so here running via papermill where notebook for each is run in parallel using threading library, have also previouls tried with ray and concurrent futures, all blow up papermill managing notebook blows up but individual notebooks keep running in parallel thru sucessful completion 

In [1]:
!date

Thu Oct 14 19:27:55 EDT 2021


#### import libraries and set notebook variables

In [2]:
import os
import papermill as pm
import threading

  from pyarrow import HadoopFileSystem


In [3]:
# naming
cohort = 'aging'

# directories
home_dir = f'/labshare/raph/notebooks/expression/adrd_neuro/{cohort}'

# base notebook to be run per tuple iteration
base_notebook = f'{home_dir}/frmt_glmmtmb_diffexp.ipynb'

# output path for the generated notebooks
out_nb_dir = f'{home_dir}/pm_gend_nbs'

# setup parameters to iterate
brain_regions = ['Entorhinal cortex', 'Putamen', 'Subventricular zone', 
                 'Middle temporal gyrus']
cell_types = ['Oligodendrocyte-1', 'SPN D1', 'SPN D2', 'Oligodendrocyte-2', 
              'Astrocyte', 'ExN CUX2 LAMP5', 'InN ADARB2 VIP', 'ExN FEZF2', 
              'OPC', 'ExN RORB THEMIS', 'InN LHX6 PVALB', 'Radial Glia', 
              'Microglia', 'InN ADARB2 LAMP5', 'ExN CUX2 ADARB2', 
              'InN LHX6 SST', 'SPN D1-2', 'Endothelial', 'ExN RORB', 
              'ExN LAMP5', 'SPN D2-2', 'ExN THEMIS']
groupings = {'Brain_region': brain_regions, 'new_anno': cell_types}

testing = False

#### utility functions

In [4]:
def run_pm_notebook(base_notebook: str, out_notebook: str, params: dict) -> str:
    ret_val = f'notebook: {out_notebook}\nparams: {params}'
    pm.execute_notebook(input_path=base_notebook, output_path=out_notebook, 
                        parameters=params, progress_bar=False)
    return ret_val

#### check the notebook template

In [5]:
pm.inspect_notebook(base_notebook)

{'region_celltype': {'name': 'region_celltype',
  'inferred_type_name': 'None',
  'default': "''",
  'help': ''},
 'obs_type': {'name': 'obs_type',
  'inferred_type_name': 'None',
  'default': "''",
  'help': ''},
 'testing': {'name': 'testing',
  'inferred_type_name': 'None',
  'default': 'False',
  'help': ''}}

#### iterate over the list running the notebook per sample pool

In [6]:
%%time
# make sure the notebook output dir exists
os.makedirs(out_nb_dir, exist_ok=True)

job_threads = []
for g_type, groups in groupings.items():
    for grouping in groups:
        param_dict = {'region_celltype': grouping, 'obs_type': g_type, 'testing': testing}
        out_notebook = f'{out_nb_dir}/{grouping.replace(" ", "_")}.glfrmt_glmmtmb_diffexp.ipynb'
        this_thread = threading.Thread(target=run_pm_notebook, 
                                       args=(base_notebook, out_notebook, param_dict,))
        job_threads.append(this_thread)
        this_thread.start()
        
for job_thread in job_threads:
    job_thread.join()     

Traceback (most recent call last):
  File "/home/gibbsr/anaconda3/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/gibbsr/anaconda3/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/gibbsr/anaconda3/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/gibbsr/anaconda3/lib/python3.8/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.initialize(argv)
  File "/home/gibbsr/anaconda3/lib/python3.8/site-packages/traitlets/config/application.py", line 88, in inner
    return method(app, *args, **kwargs)
  File "/home/gibbsr/anaconda3/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 632, in initialize
    self.init_sockets()
  File "/home/gibbsr/anaconda3/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 282, in init_sockets
    self.shell_port = self._bind_socket(self.shel

CPU times: user 1min 26s, sys: 22.5 s, total: 1min 49s
Wall time: 3h 21min 40s


In [7]:
!date

Thu Oct 14 22:49:37 EDT 2021
