## Notebook to migrate Cellbender results from phase1 GEX pools that contained Entorhinal cortex samples to phase2
- using the Cellbender
- pool format renaming to be consistent with phase 2 data

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_csv
from os.path import exists
from os import listdir
from shutil import copyfile

#### set notebook variables

In [None]:
# naming
project = 'aging_phase2'

# directories
phase1_cellbender_path = ('/labshare/raph/datasets/adrd_neuro/brain_aging/'
                          'phase1/cellbender')
wrk_dir = '/labshare/raph/datasets/adrd_neuro/brain_aging/phase2'
info_dir = f'{wrk_dir}/sample_info'
cellbender_dir = f'{wrk_dir}/cellbender'

# in files
info_file = f'{info_dir}/{project}.sample_info.csv'

# variables
DEBUG = False
phase1_pools = [4, 5]
lanes = range(1, 9)

### load the sample info data

In [None]:
info_df = read_csv(info_file)
print(f'shape of info {info_df.shape}')
if DEBUG:
    display(info_df.head())

### migrate the pools

In [None]:
%%time
for pool in phase1_pools:
    for lane in lanes:
        pool_name = f'Aging_P00{pool}_SCRN_{lane}'
        new_name = f'sample_ec_GEX_P{pool}_{lane}'
        pool_path = f'{phase1_cellbender_path}/{pool_name}_out.h5'
        if exists(pool_path):
            matching_files = [file for file in listdir(phase1_cellbender_path) 
                              if file.startswith(pool_name)]
            for pool_file in matching_files:
                copyfile(f'{phase1_cellbender_path}/{pool_file}',
                         f'{cellbender_dir}/{pool_file.replace(pool_name, new_name)}')

### verify new files in place

In [None]:
if DEBUG:
    for pool in phase1_pools:
        for lane in lanes:
            new_name = f'sample_ec_GEX_P{pool}_{lane}'
            !ls -lh {cellbender_dir}/{new_name}*

In [None]:
!date