# Cluster Differentially Methylated Genes

## Load

In [1]:
import pandas as pd
from ALLCools.clustering import one_vs_rest_dmg

## Parameters

In [2]:
mcds_paths = 'geneslop2k-vm23_frac.mcds/'
cell_meta_path = '../step_by_step/100kb/L1.ClusteringResults.csv.gz'
cluster_col = 'L1'

obs_dim = 'cell'
var_dim = 'geneslop2k-vm23'
mc_type = 'CHN'

top_n = 1000
auroc_cutoff = 0.8
adj_p_cutoff = 0.001
fc_cutoff = 0.8
max_cluster_cells = 2000
max_other_fold = 5
cpu = 10

## Load

In [3]:
cell_meta = pd.read_csv('../../cell_level/step_by_step/100kb/L1.ClusteringResults.csv.gz', index_col=0)
cell_meta.head()

Unnamed: 0,AllcPath,mCCCFrac,mCGFrac,mCGFracAdj,mCHFrac,mCHFracAdj,FinalReads,InputReads,MappedReads,DissectionRegion,...,Row384,FANSDate,Slice,Sample,leiden,mCHFrac.1,tsne_0,tsne_1,L1,L1_proba
8E_M_10,/gale/raidix/rdx-4/mapping/8E/CEMBA190711-8E-1...,0.005505,0.744279,0.742863,0.020649,0.015228,2714916.0,6036476,4014048.0,8E,...,0,190711,8,8E_190711,8,0.020649,-7.646137,16.721509,c8,0.54936
8E_M_100,/gale/raidix/rdx-4/mapping/8E/CEMBA190711-8E-1...,0.004702,0.7231,0.721792,0.0124,0.007735,3302547.0,7683706,5370970.0,8E,...,2,190711,8,8E_190711,7,0.0124,5.817251,25.264501,c8,0.395962
8E_M_1000,/gale/raidix/rdx-4/mapping/8E/CEMBA190711-8E-3...,0.005423,0.73996,0.738542,0.021733,0.016399,1369094.0,3658050,2381916.0,8E,...,5,190711,8,8E_190711,8,0.021733,-26.855708,14.451151,c1,0.9218
8E_M_1002,/gale/raidix/rdx-4/mapping/8E/CEMBA190711-8E-3...,0.004117,0.745511,0.744459,0.010192,0.006101,4571390.0,11822434,8079217.0,8E,...,5,190711,8,8E_190711,15,0.010192,29.288899,24.082294,c4,0.899333
8E_M_1003,/gale/raidix/rdx-4/mapping/8E/CEMBA190711-8E-3...,0.005528,0.750461,0.749074,0.023083,0.017652,1334845.0,3479288,2337068.0,8E,...,4,190711,8,8E_190711,3,0.023083,-12.959669,1.623964,c1,0.554967


## Calculate DMG

In [4]:
dmg_table = one_vs_rest_dmg(cell_meta,
                            group=cluster_col,
                            mcds_paths=mcds_paths,
                            obs_dim=obs_dim,
                            var_dim=var_dim,
                            mc_type=mc_type,
                            top_n=top_n,
                            adj_p_cutoff=adj_p_cutoff,
                            fc_cutoff=fc_cutoff,
                            auroc_cutoff=auroc_cutoff,
                            max_cluster_cells=max_cluster_cells,
                            max_other_fold=max_other_fold,
                            cpu=cpu)

Calculating cluster c0 DMGs.
Calculating cluster c1 DMGs.
Calculating cluster c10 DMGs.
Calculating cluster c11 DMGs.
Calculating cluster c12 DMGs.
Calculating cluster c13 DMGs.
Calculating cluster c2 DMGs.
Calculating cluster c3 DMGs.
Calculating cluster c4 DMGs.
Calculating cluster c5 DMGs.
Calculating cluster c6 DMGs.
c13 Finished.
Calculating cluster c7 DMGs.
Calculating cluster c8 DMGs.
c12 Finished.
c11 Finished.
Calculating cluster c9 DMGs.
c10 Finished.
c7 Finished.
c8 Finished.
c5 Finished.
c9 Finished.
c6 Finished.
c4 Finished.
c3 Finished.
c2 Finished.
c0 Finished.
c1 Finished.


In [6]:
dmg_table

Unnamed: 0_level_0,pvals_adj,fc,AUROC,cluster
names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSMUSG00000023079,1.532981e-15,0.148641,0.996777,c13
ENSMUSG00000027692,1.532981e-15,0.498391,0.995703,c13
ENSMUSG00000004633,1.532981e-15,0.450733,0.994531,c13
ENSMUSG00000027508,1.532981e-15,0.343717,0.992578,c13
ENSMUSG00000035067,1.532981e-15,0.306569,0.991797,c13
...,...,...,...,...
ENSMUSG00000034118,1.953162e-198,0.641361,0.800426,c1
ENSMUSG00000022801,2.459340e-198,0.673616,0.800350,c1
ENSMUSG00000031441,3.210826e-198,0.746399,0.800262,c1
ENSMUSG00000104318,3.542465e-198,0.659941,0.800229,c1


## Save

In [5]:
dmg_table.to_hdf(f'{cluster_col}.OneVsRestDMG.hdf', key='data')