# Motif Enrichment Analysis

After motif scan, we can run motif enrichment analysis by chosing two list of regions, running Fisher's Exact test between the two sets for each motif-cluster (or motif) and perform multiple tests correction

## Import

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import fisher_exact
from statsmodels.stats.multitest import multipletests
from ALLCools.mcds import RegionDS

## Load

In [2]:
dmr = RegionDS.open('test_HIP')

Using dmr as region_dim


## Motif Enrichment Between Two Sets of Regions

In [3]:
region_dim = 'dmr'
region_state_da = 'dmr_state'
feature_dim = 'sample'

In [4]:
# this is a helper function to select hypo- and hyper-DMR for one sample
hypo_dmr, hyper_dmr = dmr.get_hypo_hyper_index('CA1')

In [5]:
result = dmr.motif_enrichment(true_regions=hypo_dmr,
                              background_regions=hyper_dmr,
                              region_dim=None,
                              motif_dim='motif-cluster',
                              motif_da=None,
                              alternative='two-sided')
result.head()

Unnamed: 0_level_0,oddsratio,p,q,log2OR,-lgq
motif-cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
c1,0.285714,0.413636,0.413636,-1.807355,0.383381
c10,0.285714,0.413636,0.413636,-1.807355,0.383381
c100,0.285714,0.413636,0.413636,-1.807355,0.383381
c101,0.285714,0.413636,0.413636,-1.807355,0.383381
c102,0.285714,0.413636,0.413636,-1.807355,0.383381


### Motif Enrichment For Each Sample
Alternatively, you can use `RegionDS.sample_dmr_motif_enrichment()` to achieve the same purpose.

In [6]:
result = dmr.sample_dmr_motif_enrichment('CA1')
result.head()

Unnamed: 0_level_0,oddsratio,p,q,log2OR,-lgq
motif-cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
c1,0.285714,0.413636,0.413636,-1.807355,0.383381
c10,0.285714,0.413636,0.413636,-1.807355,0.383381
c100,0.285714,0.413636,0.413636,-1.807355,0.383381
c101,0.285714,0.413636,0.413636,-1.807355,0.383381
c102,0.285714,0.413636,0.413636,-1.807355,0.383381


### Motif Enrichment Between Sample Pairs
You can also compare hypo-DMRs (non-overlapping) from two different samples. 

In [7]:
a_not_b, a_and_b, b_not_a = dmr.get_pairwise_differential_index('CA1', 'ASC', dmr_type='hypo')

result = dmr.motif_enrichment(true_regions=a_not_b,
                              background_regions=b_not_a,
                              region_dim=None,
                              motif_dim='motif-cluster',
                              motif_da=None,
                              alternative='two-sided')
result.head()

Unnamed: 0_level_0,oddsratio,p,q,log2OR,-lgq
motif-cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
c1,0.309524,0.434211,0.434211,-1.691878,0.3623
c10,0.309524,0.434211,0.434211,-1.691878,0.3623
c100,0.309524,0.434211,0.434211,-1.691878,0.3623
c101,0.309524,0.434211,0.434211,-1.691878,0.3623
c102,0.309524,0.434211,0.434211,-1.691878,0.3623


In [8]:
# this function are the same as above
result = dmr.pairwise_dmr_motif_enrichment('CA1', 'ASC', dmr_type='hypo')
result.head()

Unnamed: 0_level_0,oddsratio,p,q,log2OR,-lgq
motif-cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
c1,0.309524,0.434211,0.434211,-1.691878,0.3623
c10,0.309524,0.434211,0.434211,-1.691878,0.3623
c100,0.309524,0.434211,0.434211,-1.691878,0.3623
c101,0.309524,0.434211,0.434211,-1.691878,0.3623
c102,0.309524,0.434211,0.434211,-1.691878,0.3623
