## Notebook to create demultiplex GEX anndata file per pool file using Papermill

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_csv
from os.path import exists
from os import makedirs
from papermill import execute_notebook

#### set notebook variables

In [None]:
# naming
proj_name = 'aging_phase2'

# directories
notebook_dir = '/home/gibbsr/working/ADRD_Brain_Aging/phase2'
wrk_dir = '/labshare/raph/datasets/adrd_neuro/brain_aging/phase2'
demux_dir = f'{wrk_dir}/demux'
info_dir = f'{wrk_dir}/sample_info'
out_nb_dir = f'{notebook_dir}/pm_gend_nbs'

# in files
info_file = f'{info_dir}/{proj_name}.sample_info.csv'
base_notebook = f'{notebook_dir}/create_anndata_with_demuxlet_identified_donors.ipynb'

# variables
DEBUG = False
lane_range = range(1, 9)

### load the sample info data

In [None]:
info_df = read_csv(info_file)
print(f'shape of info {info_df.shape}')
info_df = info_df.loc[(~info_df.gex_pool.isna()) & (~info_df.atac_pool.isna())]
print(f'shape of info {info_df.shape}')
# make sure pool nums are ints and not floats
info_df.gex_pool = info_df.gex_pool.astype('int')
info_df.atac_pool = info_df.atac_pool.astype('int')
print(f'shape of info {info_df.shape}')
if DEBUG:
    display(info_df.head())
    display(info_df.gex_pool.value_counts())

### run the notebook iterations

In [None]:
%%time
# make sure the notebook output dir exists
makedirs(out_nb_dir, exist_ok=True)

pools = set(info_df.gex_pool.unique()) | set(info_df.atac_pool.unique())

for modality in ['GEX']:
    for pool in pools:
        for lane in lane_range:
            gex_sample = f'{demux_dir}/{modality}_P{pool}_{lane}.best'
            if exists(gex_sample):
                param_dict = {'modality': modality, 'pool_num': int(pool), 
                              'lane_num': int(lane)}
                out_notebook = f'{out_nb_dir}/{modality}_P{pool}_{lane}.demultiplexed_h5ad.ipynb'
                print(param_dict)
                print(out_notebook)
                execute_notebook(input_path=base_notebook, output_path=out_notebook, 
                                 parameters=param_dict)

In [None]:
!date