In [1]:
import pandas as pd
from ALLCools.mcds import MCDS
import glob
from tqdm import tqdm
import pathlib

In [2]:
obs_dim='sample'
region_dim='dmr_region'
min_delta_change = 0.2

In [10]:
_dir = 'mC_RAW_DMR_ZARR'
output_dir = 'mC_FILTER_DMR_BED'

In [4]:
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

In [5]:
for path in tqdm(glob.glob(f'{_dir}/*')):
    break

  0%|                                                                                      | 0/8 [00:00<?, ?it/s]


In [7]:
left_cts = ['Astro-NT_NN',
 'Astro-TE_NN',
 'LDT-PCG-CS_Gata3_Lhx1_Gaba',
 'MEA-COA_Glut',
 'PAG-PPN_Pax5_Sox21_Gaba',
 'PB_Evx2_Glut',
 'STR_D1_Sema5a_Gaba',
 'Vip_Gaba']

In [12]:
for celltype in left_cts:
    path = f'{_dir}/{celltype}'
    mcds = MCDS.open(
        mcds_paths = path, 
        obs_dim=obs_dim, 
        var_dim=region_dim)
    
    mc_df = mcds['dmr_region_da'].sel(count_type = 'mc')
    cov_df = mcds['dmr_region_da'].sel(count_type = 'cov')
    frac_df = mc_df/cov_df
    frac_df = frac_df.to_pandas()
    
    # filter for delta change
    delta_change = frac_df.max() - frac_df.min()
    use_dmrs = delta_change[delta_change > min_delta_change].index
    
    dmr_bed = pd.read_csv(f'DMR-with-id/{celltype}.with-ID.DMR.bed', sep = '\t', header = None)
    dmr_bed = dmr_bed[dmr_bed[3].isin(use_dmrs)]
    
    try:
        dmr_bed['8wk'] = dmr_bed[3].map(frac_df.T[f'{celltype}.8wk'])
        dmr_bed['9mo'] = dmr_bed[3].map(frac_df.T[f'{celltype}.9mo'])
        dmr_bed['18mo'] = dmr_bed[3].map(frac_df.T[f'{celltype}.18mo'])
        
        dmr_bed['8wk'] = round(dmr_bed['8wk'],2)
        dmr_bed['9mo'] = round(dmr_bed['9mo'],2)
        dmr_bed['18mo'] = round(dmr_bed['18mo'],2)
        
    except KeyError:
        print(celltype)
    
    hyper_dmr = dmr_bed[(dmr_bed['8wk']<= dmr_bed['9mo']) & (dmr_bed['9mo']<= dmr_bed['18mo'])]
    hypo_dmr = dmr_bed[(dmr_bed['8wk']>= dmr_bed['9mo']) & (dmr_bed['9mo'] >= dmr_bed['18mo'])]
    
    hypo_hyper_dmr = pd.concat([hyper_dmr, hypo_dmr])
    
    hypo_hyper_dmr.to_csv(f'{output_dir}/{celltype}.Hypo-Hyper.aDMR.bed', sep = '\t', header = None, index= False)
    dmr_bed.to_csv(f'{output_dir}/{celltype}.Total.aDMR.bed', sep = '\t', header = None, index= False)
    hyper_dmr.to_csv(f'{output_dir}/{celltype}.Hyper.aDMR.bed', sep = '\t', header = None, index= False)
    hypo_dmr.to_csv(f'{output_dir}/{celltype}.Hypo.aDMR.bed', sep = '\t', header = None, index= False)


In [6]:
!pwd

/home/qzeng/project/aging/230907-recall-dmr/Merge_DMR
