### Link to results

**DiChIPMunk**: [on peaks](#dichip-peaks) and [on multi-task seqlets](#dichip-multi-seqlets) and [on single-task seqlets](#dichip-single-seqlets)

**HOMER**: [on peaks](#homer-peaks) and [on multi-task seqlets](#homer-multi-seqlets) and [on single-task seqlets](#homer-single-seqlets)

**MEME**: [on peaks](#meme-peaks) and [on multitask seqlets](#meme-multi-seqlets) and [on single-task seqlets](#meme-single-seqlets)

In [None]:
import sys
import os
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/src/"))
from util import figure_to_vdom_image
import motif.read_motifs as read_motifs
from motif.read_motifs import pfm_to_pwm
import plot.viz_sequence as viz_sequence
import numpy as np
import matplotlib.pyplot as plt
import vdom.helpers as vdomh
from IPython.display import display

In [None]:
# Define parameters/fetch arguments
tf_name = os.environ["TFM_TF_NAME"]
multitask_fold = int(os.environ["TFM_MULTITASK_FOLD"])

if "TFM_TASK_INDEX" in os.environ:
    task_index = int(os.environ["TFM_TASK_INDEX"])
    singletask_fold = int(os.environ["TFM_SINGLETASK_FOLD"])
else:
    task_index = None
    singletask_fold = None
    
print("TF name: %s" % tf_name)
print("Multi-task fold: %s" % multitask_fold)
print("Task index: %s" % task_index)
print("Single-task fold: %s" % singletask_fold)

In [None]:
# Define paths and constants
base_path = "/users/amtseng/tfmodisco/results/classic_motifs/"

multitask_seqlets_dir = os.path.join(
    base_path, "seqlets", "multitask_profile_finetune",
    "%s_multitask_profile_finetune_fold%s" % (tf_name, multitask_fold)
)

if task_index is None:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_taskall" % tf_name)
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_taskall" % tf_name
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_taskall" % tf_name
    )
else:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_task%d" % (tf_name, task_index))
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )
    
    singletask_seqlets_dir = os.path.join(
        base_path, "seqlets", "singletask_profile_finetune",
        "%s_singletask_profile_finetune_fold%s" % (tf_name, singletask_fold),
        "task_%d" % task_index
    )
    singletask_profile_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    singletask_count_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )

### Helper functions

In [None]:
def show_peaks_motif_table(results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`.
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        pfms, score_vals = read_motifs.import_dichipmunk_pfms(results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        pfms, score_vals = read_motifs.import_homer_pfms(results_path)
    elif mode == "meme":
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(results_path)
    else:
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(
            os.path.join(results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name, style={"text-align": "center"}),
            vdomh.th("PWM", style={"text-align": "center"})
        )
    )

    body = []
    for i, pfm in enumerate(pfms):
        pwm = pfm_to_pwm(pfm)
        if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
            # Flip to purine-rich version
            pwm = np.flip(pwm, axis=(0, 1))
        fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
        fig.tight_layout()

        body.append(
            vdomh.tr(
                vdomh.td(str(i + 1)),
                vdomh.td(str(score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            )
        )

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")

In [None]:
def show_seqlets_motif_table(profile_results_path, count_results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        p_pfms, p_score_vals = read_motifs.import_dichipmunk_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_dichipmunk_pfms(count_results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        p_pfms, p_score_vals = read_motifs.import_homer_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_homer_pfms(count_results_path)
    elif mode == "meme":
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(count_results_path)
    else:
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(
            os.path.join(profile_results_path, "meme_out")
        )
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(
            os.path.join(count_results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name + " (profile)", style={"text-align": "center"}),
            vdomh.th("PWM (profile)", style={"text-align": "center"}),
            vdomh.th(score_name + " (count)", style={"text-align": "center"}),
            vdomh.th("PWM (count)", style={"text-align": "center"})
        )
    )

    body = []
    for i in range(max(len(p_pfms), len(c_pfms))):
        rows = [vdomh.td(str(i + 1))]
        if i < len(p_pfms):
            pwm = pfm_to_pwm(p_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(p_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            
        if i < len(c_pfms):
            pwm = pfm_to_pwm(c_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(c_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            

        body.append(vdomh.tr(*rows))

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")

### Show benchmark motifs

<a id="dichip-peaks"></a>
**DiChIPMunk on peaks**

In [None]:
show_peaks_motif_table(os.path.join(peaks_path, "dichipmunk"), "dichipmunk")

<a id="dichip-multi-seqlets"></a>
**DiChIPMunk on multi-task seqlets**

In [None]:
show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "dichipmunk"),
    os.path.join(multitask_count_seqlets_path, "dichipmunk"),
    "dichipmunk"
)

<a id="dichip-single-seqlets"></a>
**DiChIPMunk on single-task seqlets**

In [None]:
if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "dichipmunk"),
        os.path.join(singletask_count_seqlets_path, "dichipmunk"),
        "dichipmunk"
    )

<a id="homer-peaks"></a>
**HOMER on peaks**

In [None]:
show_peaks_motif_table(os.path.join(peaks_path, "homer"), "homer")

<a id="homer-multi-seqlets"></a>
**HOMER on multi-task seqlets**

In [None]:
show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "homer"),
    os.path.join(multitask_count_seqlets_path, "homer"),
    "homer"
)

<a id="homer-single-seqlets"></a>
**HOMER on single-task seqlets**

In [None]:
if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "homer"),
        os.path.join(singletask_count_seqlets_path, "homer"),
        "homer"
    )

<a id="meme-peaks"></a>
**MEMEChIP on peaks**

In [None]:
show_peaks_motif_table(os.path.join(peaks_path, "memechip"), "memechip")

<a id="meme-multi-seqlets"></a>
**MEME on multi-task seqlets**

In [None]:
show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "meme"),
    os.path.join(multitask_count_seqlets_path, "meme"),
    "meme"
)

<a id="meme-single-seqlets"></a>
**MEME on single-task seqlets**

In [None]:
if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "meme"),
        os.path.join(singletask_count_seqlets_path, "meme"),
        "meme"
    )