# Cluster Differentially Methylated Genes

## Load

In [1]:
import pandas as pd
import anndata
import scanpy as sc
from ALLCools.mcds import MCDS
from ALLCools.clustering import one_vs_rest_dmg

## Parameters

In [2]:
mcds_paths = f'gene_frac/*_da_frac.mcds'
cell_meta_path = '../step_by_step/100kb/L1.ClusteringResults.csv.gz'
cluster_col = 'L1'

obs_dim = 'cell'
var_dim = 'gene'
mc_type = 'CHN'

top_n = 1000
auroc_cutoff = 0.8
adj_p_cutoff = 0.001
fc_cutoff = 0.8
max_cluster_cells = 2000
max_other_fold = 5
cpu = 10

## Load

In [3]:
cell_meta = pd.read_csv('../../cell_level/step_by_step/100kb/L1.ClusteringResults.csv.gz', index_col=0)
cell_meta.head()

Unnamed: 0,AllcPath,mCCCFrac,mCGFrac,mCGFracAdj,mCHFrac,mCHFracAdj,FinalReads,InputReads,MappedReads,DissectionRegion,...,Sample,leiden,mCHFrac.1,tsne_0,tsne_1,umap_0,umap_1,L1,L1_proba,CellTypeAnno
10E_M_0,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.008198,0.822633,0.821166,0.04164,0.033718,1626504.0,4407752,2892347.0,10E,...,10E_190625,14,0.04164,4.422312,32.817532,-2.776936,0.910858,c7,0.718517,MGE-Sst
10E_M_1,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.006019,0.743035,0.741479,0.024127,0.018218,2009998.0,5524084,3657352.0,10E,...,10E_190625,8,0.024127,-44.612333,-33.222181,9.975079,10.77597,c4,0.724762,CA3
10E_M_10,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.006569,0.750172,0.74852,0.027665,0.021235,1383636.0,3455260,2172987.0,10E,...,10E_190625,8,0.027665,-43.991445,-28.466108,9.412707,9.894364,c4,0.8284,CA3
10E_M_101,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.006353,0.760898,0.759369,0.026547,0.020323,2474670.0,7245482,4778768.0,10E,...,10E_190625,8,0.026547,-40.476595,-17.62558,7.975397,7.598768,c4,0.714865,CA3
10E_M_102,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.005409,0.75298,0.751637,0.019497,0.014164,2430290.0,7004754,4609570.0,10E,...,10E_190625,23,0.019497,-26.758905,16.534931,-0.437397,10.436956,c5,0.503633,CA1


## Calculate DMG

In [4]:
dmg_table = one_vs_rest_dmg(cell_meta,
                            group=cluster_col,
                            mcds_paths=mcds_paths,
                            obs_dim=obs_dim,
                            var_dim=var_dim,
                            mc_type=mc_type,
                            top_n=top_n,
                            adj_p_cutoff=adj_p_cutoff,
                            fc_cutoff=fc_cutoff,
                            auroc_cutoff=auroc_cutoff,
                            max_cluster_cells=max_cluster_cells,
                            max_other_fold=max_other_fold,
                            cpu=cpu)

Calculating cluster c0 DMGs.
Calculating cluster c1 DMGs.
Calculating cluster c10 DMGs.
Calculating cluster c11 DMGs.
Calculating cluster c12 DMGs.
Calculating cluster c13 DMGs.
Calculating cluster c14 DMGs.
Calculating cluster c15 DMGs.
Calculating cluster c16 DMGs.
Calculating cluster c17 DMGs.
c17 Finished.
Calculating cluster c18 DMGs.
c16 Finished.
Calculating cluster c19 DMGs.
c15 Finished.
Calculating cluster c2 DMGs.
c14 Finished.
Calculating cluster c20 DMGs.
c13 Finished.
Calculating cluster c21 DMGs.
c12 Finished.
Calculating cluster c22 DMGs.
c11 Finished.
Calculating cluster c23 DMGs.
c10 Finished.
Calculating cluster c24 DMGs.
c19 Finished.
Calculating cluster c25 DMGs.
c18 Finished.
Calculating cluster c3 DMGs.
c21 Finished.
Calculating cluster c4 DMGs.
c22 Finished.
Calculating cluster c5 DMGs.
c20 Finished.
Calculating cluster c6 DMGs.
c23 Finished.
Calculating cluster c7 DMGs.
c24 Finished.
Calculating cluster c8 DMGs.
c25 Finished.
Calculating cluster c9 DMGs.
c9 Fin

## Save

In [5]:
dmg_table.to_hdf(f'{cluster_col}.OneVsRestDMG.hdf', key='data')