In [1]:
import glob
import seaborn as sns
import pandas as pd
import pybedtools
from matplotlib import pyplot as plt
import pyBigWig
from tqdm import tqdm
import joblib

In [2]:
age_list = ['2mo','9mo','18mo']
_type  = 'Hypo'

dmr_dir = '/home/qzeng/project/aging/230907-recall-dmr/Merge_DMR/mC_FILTER_DMR_BED'
bigwig_dir = '/ceph/gale-1/qzeng/AmbData/Luisa/male_atac'

In [3]:
mc_to_atac = joblib.load('mc_to_atac')

In [9]:
mc_to_atac

{'L23_IT_ENT_Glut': 'L2_3_IT_ENT_Glut',
 'L23_IT_ENTl-PIR_Glut': 'L2_3_IT_PIR-ENTl_Glut',
 'L23_IT_PPP_Glut': 'L2_3_IT_PPP_Glut',
 'L6bCT_ENT_Glut': 'L6b_CT_ENT_Glut',
 'PAG_Glut': 'PAG_Pou4f1_Ebf2_Glut',
 'VipSncg_Gaba': 'Sncg_Gaba',
 'Astro-NT_NN': 'Astro-NT',
 'Astro-TE_NN': 'Astro-TE',
 'MB-MY_Tph2_Glut-Sero': 'MB-MY_Tph2_Glut-Sero',
 'CA1-ProS_Glut': 'CA1-ProS_Glut',
 'CA2-FC-IG_Glut': 'CA2-FC-IG_Glut',
 'CA3_Glut': 'CA3_Glut',
 'CEA-BST_Gaba': 'CEA-BST_Gaba',
 'DG_Glut': 'DG_Glut',
 'ENTmv-PA-COAp_Glut': 'ENTmv-PA-COAp_Glut',
 'Endo_NN': 'Endo_NN',
 'L2_IT_PPP-APr_Glut': 'L2_IT_PPP-APr_Glut',
 'L5_ET_CTX_Glut': 'L5_ET_CTX_Glut',
 'L5_IT_CTX_Glut': 'L5_IT_CTX_Glut',
 'L5_NP_CTX_Glut': 'L5_NP_CTX_Glut',
 'L6_CT_CTX_Glut': 'L6_CT_CTX_Glut',
 'L6_IT_CTX_Glut': 'L6_IT_CTX_Glut',
 'LA-BLA-BMA-PA_Glut': 'LA-BLA-BMA-PA_Glut',
 'Lamp5_Gaba': 'Lamp5_Gaba',
 'OPC_NN': 'OPC_NN',
 'MEA-BST_Gaba': 'MEA-BST_Gaba',
 'Microglia_NN': 'Microglia_NN',
 'Oligo_NN': 'Oligo_NN',
 'NDB-SI-MA-STRv_Lhx8_G

In [4]:
# mc_to_atac = {'Oligo_NN':'Oligo_NN',
#              'CA1-ProS_Glut': 'CA1-ProS_Glut',
#              'L5_IT_CTX_Glut': 'L5_IT_CTX_Glut',
#              'DG_Glut': 'DG_Glut',
#              'L6_IT_CTX_Glut': 'L6_IT_CTX_Glut',
#              'L5_ET_CTX_Glut': 'L5_ET_CTX_Glut',
#              'CA3_Glut': 'CA3_Glut',
#              'L23_IT_PPP_Glut':'L2_IT_PPP-APr_Glut',
#              'ENTmv-PA-COAp_Glut': 'ENTmv-PA-COAp_Glut',
#              'L45_IT_CTX_Glut':'L5_IT_CTX_Glut',
#              'LA-BLA-BMA-PA_Glut': 'LA-BLA-BMA-PA_Glut'}


In [5]:
def get_atac_counts(age, dmr_list):
    bigwig_path = f"{bigwig_dir}/Male_{atac_ct}_{age}_merge.bw"
    try:
        bw = pyBigWig.open(bigwig_path)
    except Exception as e:
        return None
    
    total_counts = []
    for dmr_id in dmr_list:
        slop = 1000
        _chr, start, end = dmr_id.split('-')
        start, end = int(start)-slop, int(end) + slop
        counts = bw.stats(_chr, start, end, type="sum")[0]
        total_counts.append(counts)
    tmpdf = pd.DataFrame({age:total_counts}, index = dmr_list)
    bw.close()
    return tmpdf

In [6]:
use_cts = list(mc_to_atac.keys())
len(use_cts)

38

In [7]:
# use_cts = ['Oligo_NN', 'CA1-ProS_Glut', 'L5_IT_CTX_Glut', 'DG_Glut', 'L6_IT_CTX_Glut', 'L5_ET_CTX_Glut', 'CA3_Glut', 
#            'L23_IT_PPP_Glut', 'ENTmv-PA-COAp_Glut']

In [8]:
for _type in ['Hypo','Hyper']:
    for ct in use_cts:
        atac_ct = mc_to_atac[ct]
        dmr_path = f"{dmr_dir}/{ct}.{_type}.aDMR.bed"
        dmr_bed = pd.read_csv(dmr_path, sep = '\t', header = None, index_col = 3)
        all_dmrs = dmr_bed.index

        all_counts_df = []
        for age in age_list:
            tmpdf = get_atac_counts(age, all_dmrs)
            if tmpdf is None:
                print(f"{ct} {age}")
                continue
            else:
                all_counts_df.append(tmpdf)
        all_counts_df = pd.concat(all_counts_df, axis = 1)
        all_counts_df.to_csv(f"1000_atac_counts/{ct}.{_type}.csv")
    