In [1]:
import pandas as pd
import numpy as np
import anndata
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import warnings
import pathlib

warnings.filterwarnings('ignore')

In [2]:
from matplotlib import rc
labelsize = 6
linewidth = 0.6
rc('lines', linewidth=linewidth)
rc('axes', labelsize=labelsize, linewidth=linewidth)
rc('xtick', labelsize=labelsize)
rc('ytick', labelsize=labelsize)
rc('xtick.major', width=linewidth)
rc('ytick.major', width=linewidth)
rc('xtick.minor', width=linewidth - 0.2)
rc('ytick.minor', width=linewidth - 0.2)

In [3]:
chrom = 'chr2'
genes = ['Lhx6']
slop = 250000
n_pc = 10
resolution = 1

In [4]:
# Parameters
chrom = "chr11"
genes = ["ENSMUSG00000069769.13", "ENSMUSG00000020431.5", "ENSMUSG00000020173.17", "ENSMUSG00000050965.14", "ENSMUSG00000045287.6", "ENSMUSG00000039976.4", "ENSMUSG00000056427.10", "ENSMUSG00000009073.16", "ENSMUSG00000018378.13", "ENSMUSG00000020523.14", "ENSMUSG00000053930.13", "ENSMUSG00000020919.11", "ENSMUSG00000020122.16", "ENSMUSG00000020363.6", "ENSMUSG00000061306.16", "ENSMUSG00000018849.6", "ENSMUSG00000020811.16", "ENSMUSG00000053519.16", "ENSMUSG00000004040.16", "ENSMUSG00000017146.12", "ENSMUSG00000018427.7", "ENSMUSG00000020866.17", "ENSMUSG00000000632.13", "ENSMUSG00000084967.1", "ENSMUSG00000020847.15", "ENSMUSG00000041654.15", "ENSMUSG00000018334.18", "ENSMUSG00000036264.9", "ENSMUSG00000020374.16", "ENSMUSG00000018634.10", "ENSMUSG00000025372.16", "ENSMUSG00000059248.13", "ENSMUSG00000056752.16", "ENSMUSG00000087259.7", "ENSMUSG00000020376.17", "ENSMUSG00000017466.9", "ENSMUSG00000003949.16", "ENSMUSG00000020176.17", "ENSMUSG00000041592.16", "ENSMUSG00000020522.13", "ENSMUSG00000000751.13", "ENSMUSG00000020599.13", "ENSMUSG00000040711.7", "ENSMUSG00000033389.16", "ENSMUSG00000086193.1", "ENSMUSG00000017692.8", "ENSMUSG00000042331.13", "ENSMUSG00000020723.3", "ENSMUSG00000040543.16", "ENSMUSG00000020393.16", "ENSMUSG00000049800.13", "ENSMUSG00000020422.13", "ENSMUSG00000049336.16", "ENSMUSG00000086584.8", "ENSMUSG00000007653.12", "ENSMUSG00000020717.19", "ENSMUSG00000020520.14", "ENSMUSG00000020844.6", "ENSMUSG00000042529.14", "ENSMUSG00000020848.7", "ENSMUSG00000046442.4", "ENSMUSG00000040548.16", "ENSMUSG00000020160.18", "ENSMUSG00000020319.9", "ENSMUSG00000009090.17", "ENSMUSG00000020389.19", "ENSMUSG00000017376.15", "ENSMUSG00000040373.12", "ENSMUSG00000020532.18", "ENSMUSG00000020272.8", "ENSMUSG00000036545.8", "ENSMUSG00000020941.7", "ENSMUSG00000020903.13", "ENSMUSG00000033066.15", "ENSMUSG00000057098.14", "ENSMUSG00000043857.16", "ENSMUSG00000042148.8", "ENSMUSG00000040447.15", "ENSMUSG00000020701.12", "ENSMUSG00000087059.7", "ENSMUSG00000084085.2", "ENSMUSG00000020182.16", "ENSMUSG00000010803.13", "ENSMUSG00000049807.16", "ENSMUSG00000044707.7", "ENSMUSG00000000861.15", "ENSMUSG00000098650.1", "ENSMUSG00000020435.17", "ENSMUSG00000020715.9", "ENSMUSG00000032878.16", "ENSMUSG00000025583.15", "ENSMUSG00000000126.11", "ENSMUSG00000025582.4", "ENSMUSG00000025576.17", "ENSMUSG00000002699.13", "ENSMUSG00000020792.15", "ENSMUSG00000044847.13", "ENSMUSG00000020807.15", "ENSMUSG00000087581.1", "ENSMUSG00000005237.14", "ENSMUSG00000107877.2", "ENSMUSG00000040594.19", "ENSMUSG00000038893.12", "ENSMUSG00000017132.17", "ENSMUSG00000018697.14", "ENSMUSG00000018648.15", "ENSMUSG00000057967.12", "ENSMUSG00000035441.14", "ENSMUSG00000076433.4", "ENSMUSG00000040838.10", "ENSMUSG00000003948.17", "ENSMUSG00000020400.17", "ENSMUSG00000020387.15", "ENSMUSG00000055010.1", "ENSMUSG00000006169.19", "ENSMUSG00000072753.12", "ENSMUSG00000085091.1", "ENSMUSG00000085684.2", "ENSMUSG00000085246.1", "ENSMUSG00000078627.9", "ENSMUSG00000020453.17", "ENSMUSG00000041695.2", "ENSMUSG00000069911.11", "ENSMUSG00000020690.12", "ENSMUSG00000045671.17", "ENSMUSG00000059439.15", "ENSMUSG00000009075.2", "ENSMUSG00000038453.17", "ENSMUSG00000087172.1", "ENSMUSG00000050944.14", "ENSMUSG00000050830.17", "ENSMUSG00000051650.11", "ENSMUSG00000070390.12", "ENSMUSG00000037992.16", "ENSMUSG00000020362.13", "ENSMUSG00000034329.16", "ENSMUSG00000020917.17", "ENSMUSG00000069830.10", "ENSMUSG00000020542.18", "ENSMUSG00000041598.7", "ENSMUSG00000046417.14", "ENSMUSG00000084978.1", "ENSMUSG00000049321.17", "ENSMUSG00000034993.7", "ENSMUSG00000061462.18", "ENSMUSG00000020436.17", "ENSMUSG00000002814.15", "ENSMUSG00000086612.1", "ENSMUSG00000034940.15", "ENSMUSG00000017453.4", "ENSMUSG00000085207.1", "ENSMUSG00000020823.16", "ENSMUSG00000007646.13", "ENSMUSG00000044949.4", "ENSMUSG00000085230.1", "ENSMUSG00000033015.16", "ENSMUSG00000020702.13", "ENSMUSG00000035355.15", "ENSMUSG00000034164.17", "ENSMUSG00000048445.6", "ENSMUSG00000018340.13", "ENSMUSG00000001036.17", "ENSMUSG00000018932.9", "ENSMUSG00000020268.14", "ENSMUSG00000057522.15", "ENSMUSG00000050541.14", "ENSMUSG00000025571.13", "ENSMUSG00000086020.1", "ENSMUSG00000000538.18", "ENSMUSG00000020366.18", "ENSMUSG00000038046.4", "ENSMUSG00000085130.1", "ENSMUSG00000085516.1", "ENSMUSG00000072640.11", "ENSMUSG00000020525.17", "ENSMUSG00000057058.16", "ENSMUSG00000047773.14", "ENSMUSG00000038178.16", "ENSMUSG00000039741.15", "ENSMUSG00000020620.14", "ENSMUSG00000070327.14", "ENSMUSG00000109967.1", "ENSMUSG00000018800.14", "ENSMUSG00000085162.7", "ENSMUSG00000085081.1", "ENSMUSG00000020926.16", "ENSMUSG00000085860.1", "ENSMUSG00000001552.14", "ENSMUSG00000020334.6", "ENSMUSG00000020413.11", "ENSMUSG00000064090.14", "ENSMUSG00000034714.9", "ENSMUSG00000010342.16", "ENSMUSG00000081769.9", "ENSMUSG00000009076.10", "ENSMUSG00000020120.15", "ENSMUSG00000020486.18", "ENSMUSG00000085719.1", "ENSMUSG00000056598.16", "ENSMUSG00000109408.1", "ENSMUSG00000060180.12", "ENSMUSG00000019189.13", "ENSMUSG00000020696.18", "ENSMUSG00000020548.10", "ENSMUSG00000011256.16", "ENSMUSG00000008855.17", "ENSMUSG00000017493.12", "ENSMUSG00000020902.12", "ENSMUSG00000025579.14", "ENSMUSG00000038366.15", "ENSMUSG00000020840.10", "ENSMUSG00000020821.17", "ENSMUSG00000078962.4", "ENSMUSG00000052373.14", "ENSMUSG00000020814.13", "ENSMUSG00000020297.10", "ENSMUSG00000034187.18", "ENSMUSG00000020697.16", "ENSMUSG00000000631.20", "ENSMUSG00000104880.1", "ENSMUSG00000086924.1", "ENSMUSG00000020191.11", "ENSMUSG00000088634.1", "ENSMUSG00000017288.15", "ENSMUSG00000001507.16", "ENSMUSG00000093485.1", "ENSMUSG00000038517.15", "ENSMUSG00000051355.18", "ENSMUSG00000037275.14", "ENSMUSG00000092528.9", "ENSMUSG00000020900.15", "ENSMUSG00000020604.13", "ENSMUSG00000004668.14", "ENSMUSG00000018548.15", "ENSMUSG00000018654.17", "ENSMUSG00000055333.14", "ENSMUSG00000086209.1", "ENSMUSG00000018339.11", "ENSMUSG00000053263.3", "ENSMUSG00000020689.4", "ENSMUSG00000118486.1", "ENSMUSG00000046719.7", "ENSMUSG00000018906.14", "ENSMUSG00000020481.15", "ENSMUSG00000080976.1", "ENSMUSG00000000142.15", "ENSMUSG00000000263.15", "ENSMUSG00000018809.2", "ENSMUSG00000064010.12", "ENSMUSG00000000301.16", "ENSMUSG00000015869.16", "ENSMUSG00000087668.1", "ENSMUSG00000110344.1", "ENSMUSG00000075410.13", "ENSMUSG00000044072.14", "ENSMUSG00000086162.1", "ENSMUSG00000038485.6", "ENSMUSG00000020611.14", "ENSMUSG00000010025.19", "ENSMUSG00000097887.2", "ENSMUSG00000035992.15", "ENSMUSG00000019590.16", "ENSMUSG00000061086.12", "ENSMUSG00000085240.1", "ENSMUSG00000061718.12", "ENSMUSG00000037750.16", "ENSMUSG00000017631.19", "ENSMUSG00000018377.10", "ENSMUSG00000020395.13", "ENSMUSG00000018428.15", "ENSMUSG00000020261.15", "ENSMUSG00000087116.1", "ENSMUSG00000055775.16", "ENSMUSG00000020810.5", "ENSMUSG00000041046.7", "ENSMUSG00000018442.13", "ENSMUSG00000072834.4", "ENSMUSG00000020827.18", "ENSMUSG00000087328.1", "ENSMUSG00000069825.12", "ENSMUSG00000017774.19", "ENSMUSG00000010358.13", "ENSMUSG00000046605.14", "ENSMUSG00000020785.17", "ENSMUSG00000087507.1", "ENSMUSG00000085419.1", "ENSMUSG00000017561.16", "ENSMUSG00000020703.3", "ENSMUSG00000020716.16", "ENSMUSG00000049928.15", "ENSMUSG00000084941.1", "ENSMUSG00000055805.15", "ENSMUSG00000034201.13", "ENSMUSG00000010554.14", "ENSMUSG00000034543.15", "ENSMUSG00000001891.16", "ENSMUSG00000045980.13", "ENSMUSG00000078789.9", "ENSMUSG00000020679.11", "ENSMUSG00000033352.11", "ENSMUSG00000000125.5", "ENSMUSG00000017417.14", "ENSMUSG00000037958.13", "ENSMUSG00000087058.1", "ENSMUSG00000000278.10", "ENSMUSG00000020798.14", "ENSMUSG00000017400.10", "ENSMUSG00000085944.1", "ENSMUSG00000020516.15", "ENSMUSG00000086311.1", "ENSMUSG00000020721.16", "ENSMUSG00000020388.12", "ENSMUSG00000038290.15", "ENSMUSG00000085941.1", "ENSMUSG00000086347.1", "ENSMUSG00000033987.16", "ENSMUSG00000085806.1", "ENSMUSG00000087306.7", "ENSMUSG00000020698.11", "ENSMUSG00000048070.4", "ENSMUSG00000111450.1", "ENSMUSG00000050567.16", "ENSMUSG00000097219.1", "ENSMUSG00000057778.14", "ENSMUSG00000097294.1", "ENSMUSG00000020467.15", "ENSMUSG00000038534.18", "ENSMUSG00000092985.1", "ENSMUSG00000020549.14", "ENSMUSG00000020774.9", "ENSMUSG00000047988.1", "ENSMUSG00000085772.7", "ENSMUSG00000041895.15", "ENSMUSG00000086199.7", "ENSMUSG00000057003.12", "ENSMUSG00000087512.2", "ENSMUSG00000087404.1", "ENSMUSG00000042678.17", "ENSMUSG00000041674.16", "ENSMUSG00000040405.13", "ENSMUSG00000001313.12", "ENSMUSG00000020839.16", "ENSMUSG00000005947.11", "ENSMUSG00000004018.9", "ENSMUSG00000038013.14", "ENSMUSG00000087407.1", "ENSMUSG00000010122.14", "ENSMUSG00000085548.1", "ENSMUSG00000065417.1", "ENSMUSG00000052298.12", "ENSMUSG00000047181.12", "ENSMUSG00000017639.13", "ENSMUSG00000048497.11", "ENSMUSG00000018217.12", "ENSMUSG00000088125.1", "ENSMUSG00000020695.14", "ENSMUSG00000034520.14", "ENSMUSG00000086932.1", "ENSMUSG00000018486.2", "ENSMUSG00000078632.4", "ENSMUSG00000106179.1", "ENSMUSG00000084890.1", "ENSMUSG00000013418.8", "ENSMUSG00000020333.17", "ENSMUSG00000033909.17", "ENSMUSG00000055546.6", "ENSMUSG00000086981.2", "ENSMUSG00000087675.1", "ENSMUSG00000005267.13", "ENSMUSG00000020790.14", "ENSMUSG00000020912.4", "ENSMUSG00000032740.16", "ENSMUSG00000020946.13", "ENSMUSG00000085652.1", "ENSMUSG00000014195.15", "ENSMUSG00000040528.15", "ENSMUSG00000083935.1", "ENSMUSG00000047126.17", "ENSMUSG00000085170.1", "ENSMUSG00000010392.8", "ENSMUSG00000013415.9", "ENSMUSG00000006930.15", "ENSMUSG00000044749.13", "ENSMUSG00000084004.1", "ENSMUSG00000044788.10", "ENSMUSG00000001440.5", "ENSMUSG00000018168.8", "ENSMUSG00000047759.6", "ENSMUSG00000010277.7", "ENSMUSG00000097239.7", "ENSMUSG00000101258.1", "ENSMUSG00000087452.1", "ENSMUSG00000013653.2", "ENSMUSG00000020412.16", "ENSMUSG00000086933.1", "ENSMUSG00000051452.8", "ENSMUSG00000087113.1", "ENSMUSG00000041797.7", "ENSMUSG00000051497.15", "ENSMUSG00000001493.9", "ENSMUSG00000041346.11", "ENSMUSG00000009900.7", "ENSMUSG00000087013.7", "ENSMUSG00000056328.14", "ENSMUSG00000082027.1", "ENSMUSG00000034177.15", "ENSMUSG00000047904.6", "ENSMUSG00000086439.1", "ENSMUSG00000086050.1", "ENSMUSG00000025574.13", "ENSMUSG00000040481.17", "ENSMUSG00000020614.13", "ENSMUSG00000065583.1", "ENSMUSG00000110653.1", "ENSMUSG00000018651.14", "ENSMUSG00000013646.17", "ENSMUSG00000020419.11", "ENSMUSG00000000204.16", "ENSMUSG00000041629.7", "ENSMUSG00000082309.3", "ENSMUSG00000017446.14", "ENSMUSG00000013643.13", "ENSMUSG00000086391.1", "ENSMUSG00000064632.1", "ENSMUSG00000039963.20", "ENSMUSG00000020335.13", "ENSMUSG00000086686.1", "ENSMUSG00000084850.1", "ENSMUSG00000092455.1"]
slop = 250000
n_pc = 10
resolution = 1


In [5]:
output_dir = pathlib.Path(chrom)
output_dir.mkdir(exist_ok=True)

## Cell Meta

In [6]:
cell_tidy_data = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/ClusteringSummary/Summary/TotalClusteringResults.msg'
)
use_clusters = [
    i.replace(' ', '_') for i in cell_tidy_data[cell_tidy_data['CellClass'].isin(['Exc', 'Inh'])]
    ['SubType'].unique() if 'Outlier' not in i
]
len(use_clusters)

145

## ATAC peaks

In [7]:
atac_peak = pd.read_msgpack('/home/hanliu/project/mouse_rostral_brain/study/DMRCluster/SubType.ATAC_peak_merged.msg')
atac_peak = atac_peak.loc[atac_peak.index.map(lambda i: i.startswith(f'Sub{chrom}_'))].copy()

## Gene Info

In [8]:
gene_meta = pd.read_csv(
    '/home/hanliu/ref/mouse/gencode/vm22/gencode.vM22.annotation.gene.flat.tsv.gz',
    index_col='gene_id', sep='\t'
)
gene_meta = gene_meta[gene_meta['chrom'] == chrom].copy()

In [9]:
exon_bed = pd.read_csv('/home/hanliu/ref/mouse/gencode/vm22/genome_anno/exon.all.bed',
                       header=None, sep='\t')
exon_bed.columns = ['chrom', 'start', 'end', 'gene_id', 'gene_name']

## DMR Info

In [10]:
with pd.HDFStore('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRInfo.h5', 'r') as hdf:
    dmr_rate = hdf['Rate']
dmr_rate = dmr_rate.loc[dmr_rate.index.map(lambda i: i.startswith(f'Sub{chrom}_'))].copy()

dmr_corr = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/DMRGeneCorr/TotalGeneDMRCorrLoop.0.3.msg'
)
dmr_corr = dmr_corr.set_index(['DMR', 'Gene'])
dmr_corr = dmr_corr.loc[dmr_corr.index.get_level_values('DMR').isin(dmr_rate.index)].copy()

dmr_bed = pd.read_csv('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalDMR.nofilter.bed',
                      sep='\t', header=None, index_col=3)
dmr_bed.columns = ['chrom', 'start', 'end']
dmr_bed = dmr_bed[dmr_bed['chrom'] == chrom].copy()

dmr_hits = anndata.read_h5ad('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalHits.HypoDMR.h5ad')
dmr_hits = dmr_hits[dmr_rate.index, :].copy()
dmr_hits = dmr_hits[:, use_clusters].copy()

dmr_annot = anndata.read_h5ad('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRAnnotation.h5ad')
dmr_annot = dmr_annot[dmr_rate.index, :].copy()

In [11]:
dmr_hits.shape[0]

233508

In [12]:
def get_gene(gene_id):
    _gene = gene_meta.loc[gene_id]
    return _gene.name, _gene['chrom'], _gene['start'], _gene['end'], _gene['strand']

## Gene's DMR clustering

In [13]:
def calculate_gene(gene_id):
    # get gene information
    gene_id, _, gene_start, gene_end, _ = get_gene(gene_id)

    # select related DMRs
    related_dmr = dmr_bed[(dmr_bed['start'] > gene_start - slop) &
                          (dmr_bed['end'] < gene_end + slop)].copy()
    
    related_dmr_rate = dmr_rate.loc[related_dmr.index, use_clusters].copy()
    related_dmr_rate.fillna(related_dmr_rate.mean(), axis=0, inplace=True)
    
    # construct Adata
    adata = anndata.AnnData(X=related_dmr_rate.values.copy(),
                            obs=pd.DataFrame([], related_dmr_rate.index),
                            var=pd.DataFrame([], related_dmr_rate.columns))
    sc.pp.scale(adata)
    sc.pp.pca(adata)
    
    pcs = adata.obsm['X_pca'][:, :n_pc]
    sc.pp.neighbors(adata, n_neighbors=int(round(np.log2(adata.shape[0]))), n_pcs=n_pc)
    sc.tl.leiden(adata, resolution=resolution)
    return adata.obs


def get_annotation(gene_id):
    gene_id, _, gene_start, gene_end, strand = get_gene(gene_id)
    tss = gene_start if strand == '+' else gene_end
    gene_cluster = calculate_gene(gene_id)
    this_corr = dmr_corr[dmr_corr.index.get_level_values('Gene') == gene_id]['Corr']
    this_corr.index = this_corr.index.droplevel('Gene')
    gene_cluster['Corr'] = gene_cluster.index.map(this_corr).fillna(0)
    
    this_dmr_bed = dmr_bed.loc[gene_cluster.index]
    dmr_center = (this_dmr_bed['end'] + this_dmr_bed['start']) / 2
    gene_length = gene_end - gene_start
    if strand == '+':
        gene_cluster['reldist_tss'] = (dmr_center - gene_start) / gene_length
    else:
        gene_cluster['reldist_tss'] = (gene_end - dmr_center) / gene_length
    gene_cluster['in_gene_body'] = (gene_cluster['reldist_tss'] > 0) & (gene_cluster['reldist_tss'] < 1)
    
    this_annot = dmr_annot[gene_cluster.index]
    annot_df = pd.DataFrame(this_annot.X.todense(), 
                 index=this_annot.obs_names, columns=this_annot.var_names)
    
    # annotate TE cols
    dna_te = annot_df.columns[20:33]
    gene_cluster['is_dna_te'] = annot_df[dna_te].sum(axis=1) != 0
    
    line_te = annot_df.columns[33:39]
    gene_cluster['is_line_te'] = annot_df[line_te].sum(axis=1) != 0
    
    ltr_te = annot_df.columns[39:45]
    gene_cluster['is_ltr_te'] = annot_df[ltr_te].sum(axis=1) != 0
    
    sine_te = annot_df.columns[45:52]
    gene_cluster['is_sine_te'] = annot_df[sine_te].sum(axis=1) != 0
    
    # this dmr within GOI's gene feature
    gene_cluster['in_intron'] = annot_df['intron'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_exon'] = annot_df['exon'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_utr3'] = annot_df['UTR3'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_utr5'] = annot_df['UTR5'].astype(bool) & gene_cluster['in_gene_body']
    
    # previous mC study
    gene_cluster['feDMR'] = annot_df['feDMR'].astype(bool)
    gene_cluster['adultDMR'] = annot_df['adultDMR'].astype(bool)
    
    
    other_profiles = []
    # DMR hypo call in each cluster
    this_hypo_hits = dmr_hits[gene_cluster.index]
    hits_df = pd.DataFrame(this_hypo_hits.X.todense(), 
                 index=this_hypo_hits.obs_names, columns=this_hypo_hits.var_names)
    hits_df.columns = hits_df.columns.map(lambda i: f'HypoDMR.{i}')
    other_profiles.append(hits_df)
    
    # DMR rate
    related_dmr_rate = dmr_rate.loc[gene_cluster['leiden'].sort_values().index, use_clusters].copy()
    related_dmr_rate.fillna(related_dmr_rate.mean(), axis=0, inplace=True)
    related_dmr_rate.columns = related_dmr_rate.columns.map(lambda i: f'DMRRate.{i}')
    other_profiles.append(related_dmr_rate)
    
    # atac peak
    atac_peak_df = atac_peak.loc[related_dmr_rate.index, use_clusters].copy()
    other_profiles.append(atac_peak_df)
    atac_peak_df.columns = atac_peak_df.columns.map(lambda i: f'ATACPeak.{i}')
    
    dmr_annotation = pd.concat([gene_cluster] + other_profiles, axis=1, sort=True)
    return dmr_annotation

In [None]:
for gene in genes:
    print(gene)
    check_path = output_dir / f'{gene}.DMR_cluster.msg'
    if check_path.exists():
        continue
    
    dmr_annotation = get_annotation(gene)
    dmr_annotation.to_msgpack(output_dir / f'{gene}.DMR_detail.msg', compress='zlib')
    
    cluster_annotation = dmr_annotation.groupby('leiden').mean()
    cluster_annotation.to_msgpack(output_dir / f'{gene}.DMR_cluster.msg', compress='zlib')

ENSMUSG00000069769.13
ENSMUSG00000020431.5
ENSMUSG00000020173.17
ENSMUSG00000050965.14
ENSMUSG00000045287.6
ENSMUSG00000039976.4
ENSMUSG00000056427.10
ENSMUSG00000009073.16
ENSMUSG00000018378.13
ENSMUSG00000020523.14
ENSMUSG00000053930.13
ENSMUSG00000020919.11
ENSMUSG00000020122.16
ENSMUSG00000020363.6
ENSMUSG00000061306.16
ENSMUSG00000018849.6
ENSMUSG00000020811.16
ENSMUSG00000053519.16
ENSMUSG00000004040.16
ENSMUSG00000017146.12
ENSMUSG00000018427.7
ENSMUSG00000020866.17
ENSMUSG00000000632.13
ENSMUSG00000084967.1
ENSMUSG00000020847.15
ENSMUSG00000041654.15
ENSMUSG00000018334.18
ENSMUSG00000036264.9
ENSMUSG00000020374.16
ENSMUSG00000018634.10
ENSMUSG00000025372.16
ENSMUSG00000059248.13
ENSMUSG00000056752.16
ENSMUSG00000087259.7
ENSMUSG00000020376.17
ENSMUSG00000017466.9
ENSMUSG00000003949.16
ENSMUSG00000020176.17
ENSMUSG00000041592.16
ENSMUSG00000020522.13
ENSMUSG00000000751.13
ENSMUSG00000020599.13
ENSMUSG00000040711.7
ENSMUSG00000033389.16
ENSMUSG00000086193.1
ENSMUSG00000017692.8
E