In [1]:
import pandas as pd
import numpy as np
import anndata
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import warnings
import pathlib

warnings.filterwarnings('ignore')

In [2]:
from matplotlib import rc
labelsize = 6
linewidth = 0.6
rc('lines', linewidth=linewidth)
rc('axes', labelsize=labelsize, linewidth=linewidth)
rc('xtick', labelsize=labelsize)
rc('ytick', labelsize=labelsize)
rc('xtick.major', width=linewidth)
rc('ytick.major', width=linewidth)
rc('xtick.minor', width=linewidth - 0.2)
rc('ytick.minor', width=linewidth - 0.2)

In [3]:
chrom = 'chr2'
genes = ['Lhx6']
slop = 250000
n_pc = 10
resolution = 1

In [4]:
# Parameters
chrom = "chr3"
genes = ["ENSMUSG00000059921.15", "ENSMUSG00000051000.17", "ENSMUSG00000044864.16", "ENSMUSG00000027674.16", "ENSMUSG00000098108.7", "ENSMUSG00000027748.11", "ENSMUSG00000056900.13", "ENSMUSG00000052544.9", "ENSMUSG00000040209.12", "ENSMUSG00000000794.9", "ENSMUSG00000097311.7", "ENSMUSG00000039286.12", "ENSMUSG00000040896.16", "ENSMUSG00000039519.6", "ENSMUSG00000027827.16", "ENSMUSG00000033767.14", "ENSMUSG00000033377.14", "ENSMUSG00000041220.10", "ENSMUSG00000053819.16", "ENSMUSG00000104283.1", "ENSMUSG00000087625.1", "ENSMUSG00000028184.12", "ENSMUSG00000044167.6", "ENSMUSG00000039037.5", "ENSMUSG00000027977.15", "ENSMUSG00000039047.17", "ENSMUSG00000110441.1", "ENSMUSG00000039234.11", "ENSMUSG00000051777.6", "ENSMUSG00000062232.14", "ENSMUSG00000104585.1", "ENSMUSG00000039058.11", "ENSMUSG00000027506.15", "ENSMUSG00000027784.10", "ENSMUSG00000104093.1", "ENSMUSG00000046743.6", "ENSMUSG00000028360.9", "ENSMUSG00000027882.18", "ENSMUSG00000040339.10", "ENSMUSG00000027895.10", "ENSMUSG00000028019.9", "ENSMUSG00000039735.15", "ENSMUSG00000061175.11", "ENSMUSG00000028152.10", "ENSMUSG00000033147.16", "ENSMUSG00000033910.13", "ENSMUSG00000042035.11", "ENSMUSG00000037610.15", "ENSMUSG00000102437.1", "ENSMUSG00000027820.12", "ENSMUSG00000028082.14", "ENSMUSG00000025764.14", "ENSMUSG00000028163.17", "ENSMUSG00000069072.9", "ENSMUSG00000058897.17", "ENSMUSG00000027849.18", "ENSMUSG00000028194.15", "ENSMUSG00000028078.14", "ENSMUSG00000027669.14", "ENSMUSG00000028179.12", "ENSMUSG00000028020.16", "ENSMUSG00000028127.10", "ENSMUSG00000025255.18", "ENSMUSG00000028164.15", "ENSMUSG00000027500.10", "ENSMUSG00000044365.15", "ENSMUSG00000041734.15", "ENSMUSG00000040151.9", "ENSMUSG00000005034.15", "ENSMUSG00000056476.13", "ENSMUSG00000027971.16", "ENSMUSG00000059857.15", "ENSMUSG00000027864.9", "ENSMUSG00000037062.13", "ENSMUSG00000028186.14", "ENSMUSG00000090120.1", "ENSMUSG00000037174.18", "ENSMUSG00000104710.1", "ENSMUSG00000027684.16", "ENSMUSG00000008730.17", "ENSMUSG00000027966.20", "ENSMUSG00000028032.13", "ENSMUSG00000028273.15", "ENSMUSG00000027663.12", "ENSMUSG00000028161.17", "ENSMUSG00000091685.6", "ENSMUSG00000028266.17", "ENSMUSG00000027803.14", "ENSMUSG00000038766.16", "ENSMUSG00000028256.16", "ENSMUSG00000027963.14", "ENSMUSG00000052430.15", "ENSMUSG00000028007.13", "ENSMUSG00000028080.16", "ENSMUSG00000032826.18", "ENSMUSG00000048332.13", "ENSMUSG00000074513.9", "ENSMUSG00000027831.9", "ENSMUSG00000056306.5", "ENSMUSG00000089924.1", "ENSMUSG00000034109.15", "ENSMUSG00000040943.12", "ENSMUSG00000027834.15", "ENSMUSG00000103181.1", "ENSMUSG00000040016.16", "ENSMUSG00000048458.8", "ENSMUSG00000027630.13", "ENSMUSG00000105891.4", "ENSMUSG00000028005.13", "ENSMUSG00000086708.1", "ENSMUSG00000100252.6", "ENSMUSG00000027716.13", "ENSMUSG00000074182.8", "ENSMUSG00000106515.4", "ENSMUSG00000033502.14", "ENSMUSG00000027993.16", "ENSMUSG00000085953.2", "ENSMUSG00000028185.12", "ENSMUSG00000050315.14", "ENSMUSG00000027737.10", "ENSMUSG00000102785.5", "ENSMUSG00000027797.15", "ENSMUSG00000027665.13", "ENSMUSG00000028086.15", "ENSMUSG00000004591.16", "ENSMUSG00000045328.11", "ENSMUSG00000027562.12", "ENSMUSG00000049100.15", "ENSMUSG00000036381.13", "ENSMUSG00000036834.16", "ENSMUSG00000051860.13", "ENSMUSG00000036863.12", "ENSMUSG00000068747.14", "ENSMUSG00000056145.5", "ENSMUSG00000034009.14", "ENSMUSG00000034640.9", "ENSMUSG00000027778.15", "ENSMUSG00000039167.12", "ENSMUSG00000106139.1", "ENSMUSG00000028004.12", "ENSMUSG00000033400.14", "ENSMUSG00000106230.1", "ENSMUSG00000027858.13", "ENSMUSG00000106407.1", "ENSMUSG00000027859.10", "ENSMUSG00000027868.11", "ENSMUSG00000027956.11", "ENSMUSG00000104586.1", "ENSMUSG00000027861.13", "ENSMUSG00000008763.16", "ENSMUSG00000039131.15", "ENSMUSG00000027695.16", "ENSMUSG00000059834.12", "ENSMUSG00000028149.12", "ENSMUSG00000025437.15", "ENSMUSG00000027883.15", "ENSMUSG00000027878.11", "ENSMUSG00000053931.11", "ENSMUSG00000036503.13", "ENSMUSG00000106577.1", "ENSMUSG00000053897.15", "ENSMUSG00000014601.13", "ENSMUSG00000017688.14", "ENSMUSG00000033161.10", "ENSMUSG00000105053.1", "ENSMUSG00000106052.1", "ENSMUSG00000069114.8", "ENSMUSG00000036353.13", "ENSMUSG00000074207.10", "ENSMUSG00000027843.13", "ENSMUSG00000057123.14", "ENSMUSG00000040998.18", "ENSMUSG00000106194.1", "ENSMUSG00000104543.1", "ENSMUSG00000028035.13", "ENSMUSG00000085527.1", "ENSMUSG00000037225.13", "ENSMUSG00000078161.8", "ENSMUSG00000027536.6", "ENSMUSG00000032902.1", "ENSMUSG00000038495.14", "ENSMUSG00000039887.11", "ENSMUSG00000027712.13", "ENSMUSG00000090066.2", "ENSMUSG00000104764.1", "ENSMUSG00000038777.19", "ENSMUSG00000068798.10", "ENSMUSG00000043542.12", "ENSMUSG00000103345.1", "ENSMUSG00000102379.1", "ENSMUSG00000027947.11", "ENSMUSG00000027660.16", "ENSMUSG00000104969.1", "ENSMUSG00000104887.1", "ENSMUSG00000028011.16", "ENSMUSG00000028125.14", "ENSMUSG00000028063.15", "ENSMUSG00000105012.1", "ENSMUSG00000105797.1", "ENSMUSG00000100962.3", "ENSMUSG00000036580.15", "ENSMUSG00000039831.16", "ENSMUSG00000102595.1", "ENSMUSG00000037016.11", "ENSMUSG00000048652.12", "ENSMUSG00000097156.7", "ENSMUSG00000050192.8", "ENSMUSG00000101860.4", "ENSMUSG00000104908.1", "ENSMUSG00000028008.10", "ENSMUSG00000105331.1", "ENSMUSG00000054414.4", "ENSMUSG00000105511.1", "ENSMUSG00000003617.16", "ENSMUSG00000102498.1", "ENSMUSG00000027865.10", "ENSMUSG00000106270.1", "ENSMUSG00000105337.4", "ENSMUSG00000033981.14", "ENSMUSG00000106354.1", "ENSMUSG00000051278.12", "ENSMUSG00000049565.16", "ENSMUSG00000056498.13", "ENSMUSG00000027860.15", "ENSMUSG00000080424.1", "ENSMUSG00000027955.16", "ENSMUSG00000105923.4", "ENSMUSG00000102574.1", "ENSMUSG00000106073.1", "ENSMUSG00000106583.1", "ENSMUSG00000037111.9", "ENSMUSG00000048581.12", "ENSMUSG00000104939.1", "ENSMUSG00000027556.15", "ENSMUSG00000044165.12", "ENSMUSG00000050150.16", "ENSMUSG00000000340.10", "ENSMUSG00000028017.7", "ENSMUSG00000036960.10", "ENSMUSG00000031286.6", "ENSMUSG00000105568.1", "ENSMUSG00000106245.1", "ENSMUSG00000102844.1", "ENSMUSG00000092196.2", "ENSMUSG00000043164.3", "ENSMUSG00000028001.16", "ENSMUSG00000028064.17", "ENSMUSG00000097428.1", "ENSMUSG00000027698.14", "ENSMUSG00000015745.9", "ENSMUSG00000051076.8", "ENSMUSG00000027722.14", "ENSMUSG00000106461.1", "ENSMUSG00000027894.14", "ENSMUSG00000105324.1", "ENSMUSG00000104108.1", "ENSMUSG00000027709.9", "ENSMUSG00000028015.3", "ENSMUSG00000097365.7", "ENSMUSG00000090163.1", "ENSMUSG00000104901.1", "ENSMUSG00000027994.14", "ENSMUSG00000028031.6", "ENSMUSG00000099146.7", "ENSMUSG00000001052.15", "ENSMUSG00000118443.1", "ENSMUSG00000037652.15", "ENSMUSG00000105753.1", "ENSMUSG00000041842.15", "ENSMUSG00000105811.1", "ENSMUSG00000033342.13", "ENSMUSG00000032913.13", "ENSMUSG00000027776.12", "ENSMUSG00000105516.4", "ENSMUSG00000104362.1", "ENSMUSG00000078620.1", "ENSMUSG00000033882.15", "ENSMUSG00000105445.1", "ENSMUSG00000104677.1", "ENSMUSG00000097252.1", "ENSMUSG00000105475.1", "ENSMUSG00000103655.1", "ENSMUSG00000104556.1", "ENSMUSG00000027499.12", "ENSMUSG00000068696.6", "ENSMUSG00000074579.14", "ENSMUSG00000090017.1", "ENSMUSG00000036832.5", "ENSMUSG00000041977.18", "ENSMUSG00000104703.1", "ENSMUSG00000097280.1", "ENSMUSG00000028089.5", "ENSMUSG00000104411.1", "ENSMUSG00000027615.14", "ENSMUSG00000037892.13", "ENSMUSG00000039865.8", "ENSMUSG00000074344.6", "ENSMUSG00000027933.11", "ENSMUSG00000027750.16", "ENSMUSG00000102820.1", "ENSMUSG00000105717.1", "ENSMUSG00000104785.1", "ENSMUSG00000091329.2", "ENSMUSG00000105034.1", "ENSMUSG00000037994.14", "ENSMUSG00000045326.13", "ENSMUSG00000104860.1", "ENSMUSG00000028013.16", "ENSMUSG00000085007.2", "ENSMUSG00000028036.6", "ENSMUSG00000097031.2", "ENSMUSG00000050931.7", "ENSMUSG00000100799.2", "ENSMUSG00000005813.12", "ENSMUSG00000042244.4", "ENSMUSG00000085655.5", "ENSMUSG00000034151.13", "ENSMUSG00000105777.1", "ENSMUSG00000060913.6", "ENSMUSG00000041959.14", "ENSMUSG00000037643.14", "ENSMUSG00000102362.1", "ENSMUSG00000027719.15", "ENSMUSG00000074238.6", "ENSMUSG00000039174.14", "ENSMUSG00000106443.1", "ENSMUSG00000102629.1", "ENSMUSG00000028018.15", "ENSMUSG00000027870.8", "ENSMUSG00000106475.1", "ENSMUSG00000086564.9", "ENSMUSG00000058388.14", "ENSMUSG00000038170.15", "ENSMUSG00000027984.8", "ENSMUSG00000027889.17", "ENSMUSG00000032952.11", "ENSMUSG00000074212.7", "ENSMUSG00000105652.1", "ENSMUSG00000074281.3", "ENSMUSG00000106525.1", "ENSMUSG00000058400.13", "ENSMUSG00000028128.13", "ENSMUSG00000085754.7", "ENSMUSG00000102893.1", "ENSMUSG00000091572.9", "ENSMUSG00000001419.17", "ENSMUSG00000102397.1", "ENSMUSG00000015711.8", "ENSMUSG00000027792.11", "ENSMUSG00000048655.17", "ENSMUSG00000106432.1", "ENSMUSG00000104554.1", "ENSMUSG00000104791.1", "ENSMUSG00000094018.2", "ENSMUSG00000027999.15", "ENSMUSG00000000001.4", "ENSMUSG00000027668.13", "ENSMUSG00000106589.1", "ENSMUSG00000089635.1", "ENSMUSG00000105023.1", "ENSMUSG00000027973.11", "ENSMUSG00000011463.5", "ENSMUSG00000027951.16", "ENSMUSG00000040213.13", "ENSMUSG00000085106.1", "ENSMUSG00000027555.8", "ENSMUSG00000022187.2", "ENSMUSG00000106095.2", "ENSMUSG00000043873.11", "ENSMUSG00000104528.1", "ENSMUSG00000055301.8", "ENSMUSG00000102862.1", "ENSMUSG00000106137.1", "ENSMUSG00000102581.1", "ENSMUSG00000106087.1", "ENSMUSG00000049404.7", "ENSMUSG00000027884.16", "ENSMUSG00000047696.16", "ENSMUSG00000028098.13", "ENSMUSG00000046519.15", "ENSMUSG00000028167.15", "ENSMUSG00000033386.10", "ENSMUSG00000103485.1", "ENSMUSG00000104693.1", "ENSMUSG00000105148.1", "ENSMUSG00000097183.2", "ENSMUSG00000027788.6", "ENSMUSG00000104109.1", "ENSMUSG00000033233.17", "ENSMUSG00000080907.1", "ENSMUSG00000000562.5", "ENSMUSG00000104918.1", "ENSMUSG00000102699.1", "ENSMUSG00000053398.11", "ENSMUSG00000001021.13", "ENSMUSG00000045576.16", "ENSMUSG00000105092.1", "ENSMUSG00000027939.11", "ENSMUSG00000094962.1", "ENSMUSG00000027550.14", "ENSMUSG00000098659.7", "ENSMUSG00000105279.1", "ENSMUSG00000106121.1", "ENSMUSG00000084617.1", "ENSMUSG00000036825.12", "ENSMUSG00000088016.1", "ENSMUSG00000097040.6", "ENSMUSG00000033831.5", "ENSMUSG00000103912.1", "ENSMUSG00000044313.13", "ENSMUSG00000027997.9", "ENSMUSG00000025757.12", "ENSMUSG00000089932.1", "ENSMUSG00000106149.1", "ENSMUSG00000106108.1", "ENSMUSG00000048304.8", "ENSMUSG00000033860.13", "ENSMUSG00000103124.1", "ENSMUSG00000027985.14", "ENSMUSG00000104553.1", "ENSMUSG00000106062.1", "ENSMUSG00000028134.11", "ENSMUSG00000106367.1", "ENSMUSG00000074388.3", "ENSMUSG00000098254.1"]
slop = 250000
n_pc = 10
resolution = 1


In [5]:
output_dir = pathlib.Path(chrom)
output_dir.mkdir(exist_ok=True)

## Cell Meta

In [6]:
cell_tidy_data = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/ClusteringSummary/Summary/TotalClusteringResults.msg'
)
use_clusters = [
    i.replace(' ', '_') for i in cell_tidy_data[cell_tidy_data['CellClass'].isin(['Exc', 'Inh'])]
    ['SubType'].unique() if 'Outlier' not in i
]
len(use_clusters)

145

## ATAC peaks

In [7]:
atac_peak = pd.read_msgpack('/home/hanliu/project/mouse_rostral_brain/study/DMRCluster/SubType.ATAC_peak_merged.msg')
atac_peak = atac_peak.loc[atac_peak.index.map(lambda i: i.startswith(f'Sub{chrom}_'))].copy()

## Gene Info

In [8]:
gene_meta = pd.read_csv(
    '/home/hanliu/ref/mouse/gencode/vm22/gencode.vM22.annotation.gene.flat.tsv.gz',
    index_col='gene_id', sep='\t'
)
gene_meta = gene_meta[gene_meta['chrom'] == chrom].copy()

In [9]:
exon_bed = pd.read_csv('/home/hanliu/ref/mouse/gencode/vm22/genome_anno/exon.all.bed',
                       header=None, sep='\t')
exon_bed.columns = ['chrom', 'start', 'end', 'gene_id', 'gene_name']

## DMR Info

In [10]:
with pd.HDFStore('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRInfo.h5', 'r') as hdf:
    dmr_rate = hdf['Rate']
dmr_rate = dmr_rate.loc[dmr_rate.index.map(lambda i: i.startswith(f'Sub{chrom}_'))].copy()

dmr_corr = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/DMRGeneCorr/TotalGeneDMRCorrLoop.0.3.msg'
)
dmr_corr = dmr_corr.set_index(['DMR', 'Gene'])
dmr_corr = dmr_corr.loc[dmr_corr.index.get_level_values('DMR').isin(dmr_rate.index)].copy()

dmr_bed = pd.read_csv('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalDMR.nofilter.bed',
                      sep='\t', header=None, index_col=3)
dmr_bed.columns = ['chrom', 'start', 'end']
dmr_bed = dmr_bed[dmr_bed['chrom'] == chrom].copy()

dmr_hits = anndata.read_h5ad('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalHits.HypoDMR.h5ad')
dmr_hits = dmr_hits[dmr_rate.index, :].copy()
dmr_hits = dmr_hits[:, use_clusters].copy()

dmr_annot = anndata.read_h5ad('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRAnnotation.h5ad')
dmr_annot = dmr_annot[dmr_rate.index, :].copy()

In [11]:
dmr_hits.shape[0]

281105

In [12]:
def get_gene(gene_id):
    _gene = gene_meta.loc[gene_id]
    return _gene.name, _gene['chrom'], _gene['start'], _gene['end'], _gene['strand']

## Gene's DMR clustering

In [13]:
def calculate_gene(gene_id):
    # get gene information
    gene_id, _, gene_start, gene_end, _ = get_gene(gene_id)

    # select related DMRs
    related_dmr = dmr_bed[(dmr_bed['start'] > gene_start - slop) &
                          (dmr_bed['end'] < gene_end + slop)].copy()
    
    related_dmr_rate = dmr_rate.loc[related_dmr.index, use_clusters].copy()
    related_dmr_rate.fillna(related_dmr_rate.mean(), axis=0, inplace=True)
    
    # construct Adata
    adata = anndata.AnnData(X=related_dmr_rate.values.copy(),
                            obs=pd.DataFrame([], related_dmr_rate.index),
                            var=pd.DataFrame([], related_dmr_rate.columns))
    sc.pp.scale(adata)
    sc.pp.pca(adata)
    
    pcs = adata.obsm['X_pca'][:, :n_pc]
    sc.pp.neighbors(adata, n_neighbors=int(round(np.log2(adata.shape[0]))), n_pcs=n_pc)
    sc.tl.leiden(adata, resolution=resolution)
    return adata.obs


def get_annotation(gene_id):
    gene_id, _, gene_start, gene_end, strand = get_gene(gene_id)
    tss = gene_start if strand == '+' else gene_end
    gene_cluster = calculate_gene(gene_id)
    this_corr = dmr_corr[dmr_corr.index.get_level_values('Gene') == gene_id]['Corr']
    this_corr.index = this_corr.index.droplevel('Gene')
    gene_cluster['Corr'] = gene_cluster.index.map(this_corr).fillna(0)
    
    this_dmr_bed = dmr_bed.loc[gene_cluster.index]
    dmr_center = (this_dmr_bed['end'] + this_dmr_bed['start']) / 2
    gene_length = gene_end - gene_start
    if strand == '+':
        gene_cluster['reldist_tss'] = (dmr_center - gene_start) / gene_length
    else:
        gene_cluster['reldist_tss'] = (gene_end - dmr_center) / gene_length
    gene_cluster['in_gene_body'] = (gene_cluster['reldist_tss'] > 0) & (gene_cluster['reldist_tss'] < 1)
    
    this_annot = dmr_annot[gene_cluster.index]
    annot_df = pd.DataFrame(this_annot.X.todense(), 
                 index=this_annot.obs_names, columns=this_annot.var_names)
    
    # annotate TE cols
    dna_te = annot_df.columns[20:33]
    gene_cluster['is_dna_te'] = annot_df[dna_te].sum(axis=1) != 0
    
    line_te = annot_df.columns[33:39]
    gene_cluster['is_line_te'] = annot_df[line_te].sum(axis=1) != 0
    
    ltr_te = annot_df.columns[39:45]
    gene_cluster['is_ltr_te'] = annot_df[ltr_te].sum(axis=1) != 0
    
    sine_te = annot_df.columns[45:52]
    gene_cluster['is_sine_te'] = annot_df[sine_te].sum(axis=1) != 0
    
    # this dmr within GOI's gene feature
    gene_cluster['in_intron'] = annot_df['intron'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_exon'] = annot_df['exon'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_utr3'] = annot_df['UTR3'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_utr5'] = annot_df['UTR5'].astype(bool) & gene_cluster['in_gene_body']
    
    # previous mC study
    gene_cluster['feDMR'] = annot_df['feDMR'].astype(bool)
    gene_cluster['adultDMR'] = annot_df['adultDMR'].astype(bool)
    
    
    other_profiles = []
    # DMR hypo call in each cluster
    this_hypo_hits = dmr_hits[gene_cluster.index]
    hits_df = pd.DataFrame(this_hypo_hits.X.todense(), 
                 index=this_hypo_hits.obs_names, columns=this_hypo_hits.var_names)
    hits_df.columns = hits_df.columns.map(lambda i: f'HypoDMR.{i}')
    other_profiles.append(hits_df)
    
    # DMR rate
    related_dmr_rate = dmr_rate.loc[gene_cluster['leiden'].sort_values().index, use_clusters].copy()
    related_dmr_rate.fillna(related_dmr_rate.mean(), axis=0, inplace=True)
    related_dmr_rate.columns = related_dmr_rate.columns.map(lambda i: f'DMRRate.{i}')
    other_profiles.append(related_dmr_rate)
    
    # atac peak
    atac_peak_df = atac_peak.loc[related_dmr_rate.index, use_clusters].copy()
    other_profiles.append(atac_peak_df)
    atac_peak_df.columns = atac_peak_df.columns.map(lambda i: f'ATACPeak.{i}')
    
    dmr_annotation = pd.concat([gene_cluster] + other_profiles, axis=1, sort=True)
    return dmr_annotation

In [None]:
for gene in genes:
    print(gene)
    check_path = output_dir / f'{gene}.DMR_cluster.msg'
    if check_path.exists():
        continue
    
    dmr_annotation = get_annotation(gene)
    dmr_annotation.to_msgpack(output_dir / f'{gene}.DMR_detail.msg', compress='zlib')
    
    cluster_annotation = dmr_annotation.groupby('leiden').mean()
    cluster_annotation.to_msgpack(output_dir / f'{gene}.DMR_cluster.msg', compress='zlib')

ENSMUSG00000059921.15
ENSMUSG00000051000.17
ENSMUSG00000044864.16
ENSMUSG00000027674.16
ENSMUSG00000098108.7
ENSMUSG00000027748.11
ENSMUSG00000056900.13
ENSMUSG00000052544.9
ENSMUSG00000040209.12
ENSMUSG00000000794.9
ENSMUSG00000097311.7
ENSMUSG00000039286.12
ENSMUSG00000040896.16
ENSMUSG00000039519.6
ENSMUSG00000027827.16
ENSMUSG00000033767.14
ENSMUSG00000033377.14
ENSMUSG00000041220.10
ENSMUSG00000053819.16
ENSMUSG00000104283.1
ENSMUSG00000087625.1
ENSMUSG00000028184.12
ENSMUSG00000044167.6
ENSMUSG00000039037.5
ENSMUSG00000027977.15
ENSMUSG00000039047.17
ENSMUSG00000110441.1
ENSMUSG00000039234.11
ENSMUSG00000051777.6
ENSMUSG00000062232.14
ENSMUSG00000104585.1
ENSMUSG00000039058.11
ENSMUSG00000027506.15
ENSMUSG00000027784.10
ENSMUSG00000104093.1
ENSMUSG00000046743.6
ENSMUSG00000028360.9
ENSMUSG00000027882.18
ENSMUSG00000040339.10
ENSMUSG00000027895.10
ENSMUSG00000028019.9
ENSMUSG00000039735.15
ENSMUSG00000061175.11
ENSMUSG00000028152.10
ENSMUSG00000033147.16
ENSMUSG00000033910.13
ENSM