In [None]:
%%capture --no-display
import sys
import os
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/src/"))
import motif_bench.read_motifs as read_motifs
import modisco
from modisco.visualization import viz_sequence
import numpy as np

In [None]:
# Define parameters/fetch arguments
tf_name = os.environ["TFM_RESULTS_TF_NAME"]
fold = int(os.environ["TFM_RESULTS_FOLD"])
if "TFM_RESULTS_TASK_INDEX" in os.environ:
    task_index = int(os.environ["TFM_RESULTS_TASK_INDEX"])
else:
    task_index = None
    
print("TF name: %s" % tf_name)
print("Fold: %s" % fold)
print("Task index: %s" % task_index)

[Skip to results](#results)

In [None]:
# Define paths and constants
background_freqs = np.array([0.25, 0.25, 0.25, 0.25])

base_path = "/users/amtseng/tfmodisco/results/motif_benchmarks"

if task_index is None:
    cond_path = os.path.join(base_path, tf_name, "%s_fold%d" % (tf_name, fold))
else:
    cond_path = os.path.join(base_path, tf_name, "%s_fold%d_task%d" % (tf_name, fold, task_index))

dichipmunk_peak_results_path = os.path.join(cond_path, "peaks", "dichipmunk")
homer_peak_results_path = os.path.join(cond_path, "peaks", "homer")
meme_peak_results_path = os.path.join(cond_path, "peaks", "meme")
dichipmunk_seqlet_results_path = os.path.join(cond_path, "seqlets", "dichipmunk")
homer_seqlet_results_path = os.path.join(cond_path, "seqlets", "homer")
meme_seqlet_results_path = os.path.join(cond_path, "seqlets", "meme")

### Helper functions

In [None]:
def info_content(track, pseudocount=0.001):
    """
    Given an L x 4 track, computes information content for each base and
    returns it as an L-array.
    """
    num_bases = track.shape[1]
    # Normalize track to probabilities along base axis
    track_norm = (track + pseudocount) / (np.sum(track, axis=1, keepdims=True) + (num_bases * pseudocount))
    ic = track_norm * np.log2(track_norm / np.expand_dims(background_freqs, axis=0))
    return np.sum(ic, axis=1)

In [None]:
def show_motif(pfm):
    ic = info_content(pfm)
    viz_sequence.plot_weights(pfm * np.expand_dims(ic, axis=1))

<a id="results"></a>
### Show benchmark motifs

**DiChIPMunk on peaks**

In [None]:
dichipmunk_peak_pfms, dichipmunk_peak_num_seqs = read_motifs.import_dichipmunk_pfms(dichipmunk_peak_results_path)
num_motifs = len(dichipmunk_peak_pfms)
for i in range(num_motifs):
    print("Motif %d/%d: supporting sequences = %d" % (i + 1, num_motifs, dichipmunk_peak_num_seqs[i]))
    show_motif(dichipmunk_peak_pfms[i])

**DiChIPMunk on seqlets**

In [None]:
dichipmunk_seqlet_pfms, dichipmunk_seqlet_num_seqs = read_motifs.import_dichipmunk_pfms(dichipmunk_seqlet_results_path)
num_motifs = len(dichipmunk_seqlet_pfms)
for i in range(num_motifs):
    print("Motif %d/%d: supporting sequences = %d" % (i + 1, num_motifs, dichipmunk_seqlet_num_seqs[i]))
    show_motif(dichipmunk_seqlet_pfms[i])

**HOMER on peaks**

In [None]:
homer_peak_pfms, homer_peak_enrichments = read_motifs.import_homer_pfms(homer_peak_results_path)
num_motifs = len(homer_peak_pfms)
for i in range(num_motifs):
    print("Motif %d/%d: log enrichment = %d" % (i + 1, num_motifs, homer_peak_enrichments[i]))
    show_motif(homer_peak_pfms[i])

**HOMER on seqlets**

In [None]:
homer_seqlet_pfms, homer_seqlet_enrichments = read_motifs.import_homer_pfms(homer_seqlet_results_path)
num_motifs = len(homer_seqlet_pfms)
for i in range(num_motifs):
    print("Motif %d/%d: log enrichment = %d" % (i + 1, num_motifs, homer_seqlet_enrichments[i]))
    show_motif(homer_seqlet_pfms[i])

**MEME on peaks**

In [None]:
meme_peak_pfms, meme_peak_evalues = read_motifs.import_meme_pfms(meme_peak_results_path)
num_motifs = len(meme_peak_pfms)
for i in range(num_motifs):
    print("Motif %d/%d: E-value = %s" % (i + 1, num_motifs, meme_peak_evalues[i]))
    show_motif(meme_peak_pfms[i])

**MEME on seqlets**

In [None]:
meme_seqlet_pfms, meme_seqlet_evalues = read_motifs.import_meme_pfms(meme_seqlet_results_path)
num_motifs = len(meme_seqlet_pfms)
for i in range(num_motifs):
    print("Motif %d/%d: E-value = %s" % (i + 1, num_motifs, meme_seqlet_evalues[i]))
    show_motif(meme_seqlet_pfms[i])