In [1]:
import pandas as pd
import pybedtools
import anndata
import pathlib
import subprocess

In [2]:
reptile_adata = anndata.read_h5ad(
    '/home/hanliu/project/mouse_rostral_brain/REPTILE/result/REPTILE_scores.subtype.h5ad'
)
enhancer_hits = pd.Series((reptile_adata.X > 0.5).sum(axis=1).A1, index=reptile_adata.obs_names)
use_dmr = enhancer_hits[enhancer_hits > 0].index

In [3]:
dmr_bed = pd.read_hdf( 
    '/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRInfo.h5', key='bed'
).loc[use_dmr].copy().reset_index().iloc[:, [1, 2, 3, 0]]

dmr_bed = pybedtools.BedTool.from_dataframe(dmr_bed)

In [4]:
fedmr_bed = pybedtools.BedTool('/home/hanliu/ref/inhouse/He_2020_Nature_Mouse_Tissue_Developmental/feDMR_FB.bed')

In [5]:
with_fedmr_bed = dmr_bed.intersect(fedmr_bed, wa=True, u=True)
without_fedmr_bed = dmr_bed.intersect(fedmr_bed, v=True)

In [19]:
with_fedmr_bed.to_dataframe().sample(3000, random_state=0).to_csv('eDMR_with_feDMR.bed', sep='\t', header=None, index=None)
without_fedmr_bed.to_dataframe().sample(3000, random_state=0).to_csv('eDMR_without_feDMR.bed', sep='\t', header=None, index=None)

## Make Shuffle

In [20]:
!bedtools shuffle -i eDMR_with_feDMR.bed -g /home/hanliu/ref/mouse/genome/mm10.main.chrom.sizes > eDMR_with_feDMR.shuffle.bed
!bedtools shuffle -i eDMR_without_feDMR.bed -g /home/hanliu/ref/mouse/genome/mm10.main.chrom.sizes > eDMR_without_feDMR.shuffle.bed

In [21]:
!ls

eDMR_with_feDMR.bed		    eDMR_without_feDMR.profile.gz
eDMR_with_feDMR.profile.gz	    eDMR_without_feDMR.shuffle.bed
eDMR_with_feDMR.shuffle.bed	    eDMR_without_feDMR.shuffle.profile.gz
eDMR_with_feDMR.shuffle.profile.gz  make_bed.ipynb
eDMR_without_feDMR.bed		    plot_profile.ipynb


In [23]:
bw_dir = pathlib.Path('/home/hanliu/ddn/hanliu/Yupeng_ENCODE_developmental_mouse_tissue/ChIP/')
bigwig_paths_str = ' '.join(map(str, bw_dir.glob('*FB*H3K27ac*')))

for bed_path in pathlib.Path().glob('eDMR*bed'):
    output = bed_path.name[:-3]+'profile.gz'
    cmd = f'computeMatrix reference-point -S {bigwig_paths_str} -R {bed_path} -a 3000 -b 3000 -bs 30 --referencePoint center -p 47 -o {output}'
    subprocess.run(cmd, shell=True,)

In [None]:
cmd