# Cluster Differentially Methylated Genes

## Load

In [1]:
import numpy as np
import pandas as pd
import anndata
import scanpy as sc
from ALLCools.mcds import MCDS
from ALLCools.clustering import one_vs_rest_dmg

## Parameters

In [9]:
gene_frac_dir = 'gene_frac/'

cell_meta_path = '../step_by_step/100kb/L1.ClusteringResults.csv.gz'
cluster_col = 'L1'

obs_dim = 'cell'
var_dim = 'gene'
mc_type = 'CHN'

top_n = 1000
auroc_cutoff = 0.8
adj_p_cutoff = 0.001
fc_cutoff = 0.8
max_cluster_cells = 2000
max_other_fold = 5

## Load

In [3]:
mcds = MCDS.open(f'{gene_frac_dir}/*_da_frac.mcds')
cell_meta = pd.read_csv(cell_meta_path, index_col=0)

## Calculate DMG

In [4]:
dmg_table = one_vs_rest_dmg(cell_meta,
                group=cluster_col,
                mcds=mcds,
                obs_dim=obs_dim,
                var_dim=var_dim,
                mc_type=mc_type,
                top_n=top_n,
                adj_p_cutoff=adj_p_cutoff,
                fc_cutoff=fc_cutoff,
                auroc_cutoff=auroc_cutoff,
                max_cluster_cells=max_cluster_cells,
                max_other_fold=max_other_fold)

## Save

In [7]:
dmg_table.to_hdf(f'{cluster_col}.OneVsRestDMG.hdf', key='data')