## Notebook to run the the latent factors age analysis per broad and specific cell-type using PaperMill

In [None]:
!date

#### import libraries

In [None]:
from papermill import execute_notebook
from pandas import read_csv, concat
from os import makedirs

#### set notebook variables

In [None]:
# parameters
project = 'aging_phase2'

# directories
nb_dir = '/home/gibbsr/working/ADRD_Brain_Aging/phase2'
wrk_dir = '/labshare/raph/datasets/adrd_neuro/brain_aging/phase2'
results_dir = f'{wrk_dir}/results'

# base notebook to be run per tuple iteration
base_notebook = f'{nb_dir}/development/analyses/latent_factors_age_analysis.ipynb'

# output path for the generated notebooks
out_nb_dir = f'{nb_dir}/analyses/pm_gend_nbs'

# setup parameters to iterate
categories = {'curated_type': 'broad', 'cluster_name': 'specific'}
modalities = ['GEX', 'ATAC']
DEBUG = True

### make sure output notebook directory exists

In [None]:
makedirs(out_nb_dir, exist_ok=True)

#### load age associated feature results
get the age associated GEX and ATAC features need per cell-type

In [None]:
%%time
age_results = []
for category, prefix in categories.items():
    for modality in modalities:
        print(modality)
        in_file = f'{results_dir}/{project}.{modality}.{prefix}.glm_tweedie_fdr_filtered.age.csv'
        this_df = read_csv(in_file)
        this_df['modality'] = modality
        this_df['type'] = category
        age_results.append(this_df)
age_results_df = concat(age_results)
print(f'shape of the age results is {age_results_df.shape}')
if DEBUG:
    display(age_results_df.sample(5))
    display(age_results_df.modality.value_counts())
    display(age_results_df.groupby('type').tissue.value_counts())

### run the notebook iterations

In [None]:
%%time

for category, cell_types in age_results_df.groupby('type').tissue.unique().items():
    prefix = categories.get(category)
    for cell_type in cell_types:
        param_dict = {'category': category, 'cell_type': cell_type}
        out_notebook = f'{out_nb_dir}/{prefix}_{cell_type}.latent_factors_age_analysis.ipynb'
        print(param_dict)
        print(out_notebook)
        try:    
            execute_notebook(input_path=base_notebook, output_path=out_notebook, 
                             parameters=param_dict)
        except:
            print(f'{category} {cell_type} encountered an error continuing on next')

In [None]:
!date