# Filter DMR

In [1]:
from ALLCools.mcds import RegionDS
from ALLCools.dmr import collapse_replicates

In [2]:
region_ds = RegionDS.open('HIP_small', select_dir=['dmr', 'dmr_genome-features'])

Using dmr as region_dim


## Remove Blacklist
- Only if your genome has a blacklist and you've annotated the region ds with blacklist

In [3]:
is_blacklist = region_ds.get_feature('blacklist', 'genome-features').astype(bool)
is_blacklist

dmr
chr1-0       False
chr1-1       False
chr1-2       False
chr1-3       False
chr1-4       False
             ...  
chr19-118    False
chr19-119    False
chr19-120    False
chr19-121    False
chr19-122    False
Length: 131, dtype: bool

In [4]:
# assign blacklist-overlapping DMR state to 0 (not significant)
region_ds['dmr_state'].loc[{'dmr': is_blacklist.values}] = 0

## Replicate Consistency

In [5]:
# each cell cluster has two "replicates"
sample_to_replicate = region_ds.get_feature('sample').apply(lambda i: i.split('_')[1])
sample_to_replicate.value_counts()

ASC             2
CA1             2
CA23            2
CGE-VipLamp5    2
DG              2
MGC             2
MGE-PvSst       2
NonN            2
ODC             2
OPC             2
dtype: int64

In [6]:
# add sample level DMR state matrix
collapse_replicates(region_ds=region_ds,
                    replicate_label=sample_to_replicate,
                    state_da='dmr_state')

Collapsed sample state added in exist RegionDS at /home/hanliu/project/allcools_doc/cluster_level/RegionDS/HIP_small


## Final DMR hypo- hyper- state matrix
set `dmr_state_collapsed` in futher sample based analysis or set `use_collapsed=True`

In [7]:
# this dataarray is newly added
region_ds['dmr_state_collapsed']

In [8]:
# you can get collapsed sample DMR ids
ca1_hypo, ca1_hyper = region_ds.get_hypo_hyper_index('CA1')

In [9]:
ca1_hypo

Index(['chr1-3', 'chr1-5', 'chr19-1', 'chr19-4', 'chr19-17', 'chr19-21',
       'chr19-24', 'chr19-25', 'chr19-26', 'chr19-27', 'chr19-28', 'chr19-34',
       'chr19-38', 'chr19-39', 'chr19-42', 'chr19-43', 'chr19-45', 'chr19-46',
       'chr19-48', 'chr19-52', 'chr19-53', 'chr19-54', 'chr19-55', 'chr19-57',
       'chr19-58', 'chr19-60', 'chr19-61', 'chr19-63', 'chr19-64', 'chr19-65',
       'chr19-66', 'chr19-67', 'chr19-70', 'chr19-72', 'chr19-79', 'chr19-81',
       'chr19-88', 'chr19-90', 'chr19-91', 'chr19-92', 'chr19-95', 'chr19-96',
       'chr19-97', 'chr19-98', 'chr19-100', 'chr19-101', 'chr19-103',
       'chr19-104', 'chr19-105', 'chr19-107', 'chr19-108', 'chr19-109',
       'chr19-111', 'chr19-112', 'chr19-113', 'chr19-114', 'chr19-115',
       'chr19-116', 'chr19-117', 'chr19-118', 'chr19-119', 'chr19-120',
       'chr19-121', 'chr19-122'],
      dtype='object', name='dmr')

In [10]:
ca1_hyper

Index(['chr1-1', 'chr1-2', 'chr1-7', 'chr19-6', 'chr19-7', 'chr19-8',
       'chr19-9', 'chr19-11', 'chr19-13', 'chr19-14', 'chr19-18', 'chr19-19',
       'chr19-20', 'chr19-22', 'chr19-32', 'chr19-35', 'chr19-40', 'chr19-50',
       'chr19-82', 'chr19-83', 'chr19-84', 'chr19-87', 'chr19-89'],
      dtype='object', name='dmr')