## Contents

### Link to results

[TF-MoDisco motif summaries](#tfmodisco_results)

[TF-MoDISco motif clusters](#motif_heatmaps)

[TF-MoDISco motif clusters (motifPiles)](#motif_trees)

[TF-MoDISco motif instance calls](#motif_hits)

[HOMER, MEME, and DiChIPMunk motifs](#motif_benchmark_matching)

[Convolutional filter motifs](#filter_derived_motifs)

In [None]:
import os
import vdom.helpers as vdomh
from IPython.display import display

In [None]:
def collect_model_defs(
    tf, num_tasks, multitask_finetune_model_def_tsv,
    singletask_finetune_model_def_tsv
):
    """
    From the TSVs containing model statistics/definitions, extracts the
    following and returns them for the given TF:
        1. Model definition of the finetuned multi-task profile model, as
            (fold_num, run_num, epoch_num) (i.e. just the one best fold)
        2. Model definitions of the finetuned single-task profile models, as a
            list of (task_index, fold_num, run_num, epoch_num) (i.e. just the
            one best fold for each task)
    """
    # Finetuned multi-task model
    finetune_multitask_def = None
    with open(multitask_finetune_model_def_tsv, "r") as f:
        for line in f:
            tokens = line.strip().split("\t")
            if tokens[0] == tf and int(tokens[1]) == num_tasks - 1:
                assert finetune_multitask_def is None
                finetune_multitask_def = (
                    int(tokens[2]), int(tokens[3].split("/")[1]),
                    int(tokens[4].split("/")[1])
                )

    # Finetuned single-task models
    finetune_singletask_defs = []
    with open(singletask_finetune_model_def_tsv, "r") as f:
        for line in f:
            tokens = line.strip().split("\t")
            if tokens[0] == tf:
                finetune_singletask_defs.append((
                    int(tokens[1]), int(tokens[2]),
                    int(tokens[3].split("/")[1]), int(tokens[4].split("/")[1])
                ))
    assert len(finetune_singletask_defs) == num_tasks

    return finetune_multitask_def, finetune_singletask_defs

In [None]:
multitask_finetune_model_def_tsv = "/users/amtseng/tfmodisco/results/model_stats/multitask_profile_finetune_stats.tsv"
singletask_finetune_model_def_tsv = "/users/amtseng/tfmodisco/results/model_stats/singletask_profile_finetune_stats.tsv"
tf_names = [
    "E2F6", "FOXA2", "SPI1", "CEBPB", "MAX", "GABPA", "MAFK", "JUND", "NR3C1-reddytime", "REST"
]
tf_num_tasks = {
    "E2F6": 2,
    "FOXA2": 4,
    "SPI1": 4,
    "CEBPB": 7,
    "MAX": 7,
    "GABPA": 9,
    "MAFK": 9,
    "JUND": 14,
    "NR3C1-reddytime": 16,
    "REST": 20
}

best_model_types = {
    "E2F6": list("MM"),
    "FOXA2": list("SSMM"),
    "SPI1": list("MSSS"),
    "CEBPB": list("MMMMSMM"),
    "MAX": list("MMSMMSS"),
    "GABPA": list("MMMSMMMMM"),
    "MAFK": list("MMMMMMMMM"),
    "JUND": list("SMMSMSSSSSSSMS"),
    "NR3C1-reddytime": list("MMMSMMSMMMMSMMMM"),
    "REST": list("MMMMMMMMMSMMSMMSMMMM")
}

In [None]:
tf_mt_folds = {}
tf_st_folds = {}
for tf_name in tf_names:
    mt_def, st_defs = collect_model_defs(
        tf_name, tf_num_tasks[tf_name], multitask_finetune_model_def_tsv,
        singletask_finetune_model_def_tsv
    )
    tf_mt_folds[tf_name] = mt_def[0]
    tf_st_folds[tf_name] = []
    for i in range(tf_num_tasks[tf_name]):
        assert st_defs[i][0] == i
        tf_st_folds[tf_name].append(st_defs[i][1])

In [None]:
task_def_tsv = "/users/amtseng/tfmodisco/results/task_definitions.txt"
task_defs = {}
with open(task_def_tsv, "r") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        if "\t" not in line:
            tf_name = line
            task_defs[tf_name] = []
        else:
            tokens = line.split("\t")
            task_defs[tf_name].append((tokens[1], tokens[2]))

<a id="tfmodisco_results"></a>
### TF-MoDISco motif summaries
Notebooks which show the basic TF-MoDISco motif results, including:
- All discovered motifs as untrimmed PFMs, CWMs, and hCWMs
- Trimmed hCWMs
- Observed/predicted profiles underlying each motif
- Distance distribution of the seqlets underlying each motif to the nearest peak summit
- TOMTOM matches of each motif to known JASPAR/HOCOMOCO motifs
- Examples of seqlets as importance scores that underlie each motif

In [None]:
header = vdomh.thead(
    vdomh.tr(
        vdomh.th("TF", style={"text-align": "center"}),
        vdomh.th("Task index", style={"text-align": "center"}),
        vdomh.th("Exp ID", style={"text-align": "center"}),
        vdomh.th("Cell type", style={"text-align": "center"}),
        vdomh.th("Link", style={"text-align": "center"})
    )
)

rows = []
for tf_name in tf_names:
    for task_index in range(tf_num_tasks[tf_name]):
        if best_model_types[tf_name][task_index] == "M":
            fold_num = tf_mt_folds[tf_name]
            link = os.path.join(
                "tfmodisco_results",
                "multitask_profile_finetune",
                "%s_multitask_profile_finetune_fold%d" % (tf_name, fold_num),
                "%s_multitask_profile_finetune_task%d_fold%d_{key}_tfm_results.html" % (tf_name, task_index, fold_num)
            )
        else:
            fold_num = tf_st_folds[tf_name][task_index]
            link = os.path.join(
                "tfmodisco_results",
                "singletask_profile_finetune",
                "%s_singletask_profile_finetune_fold%d" % (tf_name, fold_num),
                "task_%d" % task_index,
                "%s_singletask_profile_finetune_task%d_fold%d_{key}_tfm_results.html" % (tf_name, task_index, fold_num)
            )
        rows.append(
            vdomh.tr(
                vdomh.td(tf_name),
                vdomh.td("Task %d" % task_index),
                vdomh.td(task_defs[tf_name][task_index][0]),
                vdomh.td(task_defs[tf_name][task_index][1]),
                vdomh.td(
                    vdomh.a("Count head", href=link.format(key="count")),
                    vdomh.span(" "),
                    vdomh.a("Profile head", href=link.format(key="profile"))
                )
            )
        )
    fold_num = tf_mt_folds[tf_name]
    link = os.path.join(
        "tfmodisco_results",
        "multitask_profile_finetune",
        "%s_multitask_profile_finetune_fold%d" % (tf_name, fold_num),
        "%s_multitask_profile_finetune_fold%d_{key}_tfm_results.html" % (tf_name, fold_num)
    )
    rows.append(
        vdomh.tr(
            vdomh.td(tf_name),
            vdomh.td("All tasks"),
            vdomh.td("N/A"),
            vdomh.td("N/A"),
            vdomh.td(
                vdomh.a("Count head", href=link.format(key="count")),
                vdomh.span(" "),
                vdomh.a("Profile head", href=link.format(key="profile"))
            )
        )
    )
    
display(vdomh.table(header, vdomh.tbody(*rows)))

<a id="motif_heatmaps"></a>
### TF-MoDISco motif clusters
Notebooks which show clustering between motifs identified between TF-MoDISco runs, including:
- Clusters of similar TF-MoDISco motifs within the same model (motifs from counts and profile heads pooled)
- Clusters of similar TF-MoDISco motifs over all 10 folds (non-fine-tuned models)

In [None]:
header = vdomh.thead(
    vdomh.tr(
        vdomh.th("TF", style={"text-align": "center"}),
        vdomh.th("Task index", style={"text-align": "center"}),
        vdomh.th("Exp ID", style={"text-align": "center"}),
        vdomh.th("Cell type", style={"text-align": "center"}),
        vdomh.th("Link", style={"text-align": "center"})
    )
)

rows = []
for tf_name in tf_names:
    for task_index in range(tf_num_tasks[tf_name]):
        if best_model_types[tf_name][task_index] == "M":
            best_fold_link = os.path.join(
                "motif_heatmaps",
                "multitask_profile_finetune",
                "%s_multitask_profile_finetune_task%d_motifs.html" % (tf_name, task_index)
            )
        else:
            best_fold_link = os.path.join(
                "motif_heatmaps",
                "singletask_profile_finetune",
                "%s_singletask_profile_finetune_task%d_motifs.html" % (tf_name, task_index)
            )
        all_folds_link = os.path.join(
            "motif_heatmaps",
            "singletask_profile",
            "%s_singletask_profile_task%d_all_folds_motifs.html" % (tf_name, task_index)
        )
        rows.append(
            vdomh.tr(
                vdomh.td(tf_name),
                vdomh.td("Task %d" % task_index),
                vdomh.td(task_defs[tf_name][task_index][0]),
                vdomh.td(task_defs[tf_name][task_index][1]),
                vdomh.td(
                    vdomh.a("Fine-tuned model", href=best_fold_link),
                    vdomh.span(" "),
                    vdomh.a("Across all 10 folds", href=all_folds_link)
                )
            )
        )
    best_fold_link = os.path.join(
        "motif_heatmaps",
        "multitask_profile_finetune",
        "%s_multitask_profile_finetune_motifs.html" % tf_name
    )
    all_folds_link = os.path.join(
        "motif_heatmaps",
        "multitask_profile",
        "%s_multitask_profile_all_folds_motifs.html" % tf_name
    )
    rows.append(
        vdomh.tr(
            vdomh.td(tf_name),
            vdomh.td("All tasks"),
            vdomh.td("N/A"),
            vdomh.td("N/A"),
            vdomh.td(
                vdomh.a("Fine-tuned model", href=best_fold_link),
                vdomh.span(" "),
                vdomh.a("Across all 10 folds", href=all_folds_link)
            )
        )
    )
    
display(vdomh.table(header, vdomh.tbody(*rows)))

<a id="motif_trees"></a>
### TF-MoDISco motif clusters (motifPiles)
Notebooks which show clustering between motifs identified between TF-MoDISco runs, including:
- Clusters of similar TF-MoDISco motifs within the same model (motifs from counts and profile heads pooled)
- Clusters of similar TF-MoDISco motifs over all 10 folds (non-fine-tuned models)

Unlike the other motif clusters notebooks, these are simple trees generated by the [motifPiles.R](http://biocworkshops2019.bioconductor.org.s3-website-us-east-1.amazonaws.com/page/motifStackWorkshop__motifStackWorkshop/) library

In [None]:
header = vdomh.thead(
    vdomh.tr(
        vdomh.th("TF", style={"text-align": "center"}),
        vdomh.th("Task index", style={"text-align": "center"}),
        vdomh.th("Exp ID", style={"text-align": "center"}),
        vdomh.th("Cell type", style={"text-align": "center"}),
        vdomh.th("Link", style={"text-align": "center"})
    )
)

rows = []
for tf_name in tf_names:
    for task_index in range(tf_num_tasks[tf_name]):
        if best_model_types[tf_name][task_index] == "M":
            best_fold_link = os.path.join(
                "motif_trees",
                "multitask_profile_finetune",
                "%s_multitask_profile_finetune" % tf_name,
                "task%d_motifs.png" % task_index
            )
        else:
            best_fold_link = os.path.join(
                "motif_trees",
                "singletask_profile_finetune",
                "%s_singletask_profile_finetune" % tf_name,
                "task%d_motifs.png" % task_index
            )
        all_folds_link = os.path.join(
            "motif_trees",
            "singletask_profile",
            "%s_singletask_profile_task%d" % (tf_name, task_index),
            "all_folds_motifs.png"
        )
        rows.append(
            vdomh.tr(
                vdomh.td(tf_name),
                vdomh.td("Task %d" % task_index),
                vdomh.td(task_defs[tf_name][task_index][0]),
                vdomh.td(task_defs[tf_name][task_index][1]),
                vdomh.td(
                    vdomh.a("Fine-tuned model", href=best_fold_link),
                    vdomh.span(" "),
                    vdomh.a("Across all 10 folds", href=all_folds_link)
                )
            )
        )
    best_fold_link = os.path.join(
        "motif_trees",
        "multitask_profile_finetune",
        "%s_multitask_profile_finetune" % tf_name,
        "aggregate_motifs.png"
    )
    all_folds_link = os.path.join(
        "motif_trees",
        "multitask_profile",
        "%s_multitask_profile" % tf_name,
        "all_folds_motifs.png"
    )
    rows.append(
        vdomh.tr(
            vdomh.td(tf_name),
            vdomh.td("All tasks"),
            vdomh.td("N/A"),
            vdomh.td("N/A"),
            vdomh.td(
                vdomh.a("Fine-tuned model", href=best_fold_link),
                vdomh.span(" "),
                vdomh.a("Across all 10 folds", href=all_folds_link)
            )
        )
    )
    
display(vdomh.table(header, vdomh.tbody(*rows)))

<a id="motif_hits"></a>
### TF-MoDISco motif instance calls
Notebooks which show the motif hits from TF-MoDISco motifs within peaks, including:
- Distribution of number of total motif hits (over all motifs) per peak
- Fraction of peaks that contain each individual motif
- Examples of motif calls in the importance scores
- Indicator matrix of which peaks have what motifs
- Heatmap of co-occurrence between motifs within peaks
- Distance distributions between significantly co-occurring motifs

There are two versions hits for each set of motifs: called by the TF-MoDISco hit scoring algorithm or with MOODS

In [None]:
header = vdomh.thead(
    vdomh.tr(
        vdomh.th("TF", style={"text-align": "center"}),
        vdomh.th("Task index", style={"text-align": "center"}),
        vdomh.th("Exp ID", style={"text-align": "center"}),
        vdomh.th("Cell type", style={"text-align": "center"}),
        vdomh.th("Link: TF-MoDISco hit scorer", style={"text-align": "center"}),
        vdomh.th("Link: MOODS", style={"text-align": "center"})
    )
)

rows = []
for tf_name in tf_names:
    for task_index in range(tf_num_tasks[tf_name]):
        if best_model_types[tf_name][task_index] == "M":
            fold_num = tf_mt_folds[tf_name]
            link = os.path.join(
                "motif_hits",
                "{algo}",
                "multitask_profile_finetune",
                "%s_multitask_profile_finetune_fold%d" % (tf_name, fold_num),
                "%s_multitask_profile_finetune_task%d_fold%d_{key}_{algo}_motif_hits.html" % (tf_name, task_index, fold_num)
            )
        else:
            fold_num = tf_st_folds[tf_name][task_index]
            link = os.path.join(
                "motif_hits",
                "{algo}",
                "singletask_profile_finetune",
                "%s_singletask_profile_finetune_fold%d" % (tf_name, fold_num),
                "task_%d" % task_index,
                "%s_singletask_profile_finetune_task%d_fold%d_{key}_{algo}_motif_hits.html" % (tf_name, task_index, fold_num)
            )
        rows.append(
            vdomh.tr(
                vdomh.td(tf_name),
                vdomh.td("Task %d" % task_index),
                vdomh.td(task_defs[tf_name][task_index][0]),
                vdomh.td(task_defs[tf_name][task_index][1]),
                vdomh.td(
                    vdomh.a("Count head", href=link.format(algo="tfm", key="count")),
                    vdomh.span(" "),
                    vdomh.a("Profile head", href=link.format(algo="tfm", key="profile"))
                ),
                vdomh.td(
                    vdomh.a("Count head", href=link.format(algo="moods", key="count")),
                    vdomh.span(" "),
                    vdomh.a("Profile head", href=link.format(algo="moods", key="profile"))
                )
            )
        )
    fold_num = tf_mt_folds[tf_name]
    link = os.path.join(
        "motif_hits",
        "{algo}",
        "multitask_profile_finetune",
        "%s_multitask_profile_finetune_fold%d" % (tf_name, fold_num),
        "%s_multitask_profile_finetune_fold%d_{key}_{algo}_motif_hits.html" % (tf_name, fold_num)
    )
    rows.append(
        vdomh.tr(
            vdomh.td(tf_name),
            vdomh.td("All tasks"),
            vdomh.td("N/A"),
            vdomh.td("N/A"),
            vdomh.td(
                vdomh.a("Count head", href=link.format(algo="tfm", key="count")),
                vdomh.span(" "),
                vdomh.a("Profile head", href=link.format(algo="tfm", key="profile"))
            ),
            vdomh.td(
                vdomh.a("Count head", href=link.format(algo="moods", key="count")),
                vdomh.span(" "),
                vdomh.a("Profile head", href=link.format(algo="moods", key="profile"))
            )
        )
    )
    
display(vdomh.table(header, vdomh.tbody(*rows)))

<a id="motif_benchmark_matching"></a>
### HOMER, MEME, and DiChIPMunk motifs
Notebooks which show the motifs discovered by HOMER, MEME, and DiChIPMunk vs TF-MoDISco, including:
- Motifs discovered on peaks
- Motifs discovered on seqlets

For each TF-MoDISco run, we match the motifs discovered by TF-MoDISco to the motifs discovered by HOMER/MEME/DiChIPMunk run on the same set of peaks, or the TF-MoDISco seqlets.

In [None]:
header = vdomh.thead(
    vdomh.tr(
        vdomh.th("TF", style={"text-align": "center"}),
        vdomh.th("Task index", style={"text-align": "center"}),
        vdomh.th("Exp ID", style={"text-align": "center"}),
        vdomh.th("Cell type", style={"text-align": "center"}),
        vdomh.th("Link", style={"text-align": "center"})
    )
)

rows = []
for tf_name in tf_names:
    for task_index in range(tf_num_tasks[tf_name]):
        if best_model_types[tf_name][task_index] == "M":
            fold_num = tf_mt_folds[tf_name]
            link = os.path.join(
                "motif_benchmark_matching",
                "multitask_profile_finetune",
                "%s_multitask_profile_finetune_fold%d" % (tf_name, fold_num),
                "%s_multitask_profile_finetune_task%d_fold%d_{key}.html" % (tf_name, task_index, fold_num)
            )
        else:
            fold_num = tf_st_folds[tf_name][task_index]
            link = os.path.join(
                "motif_benchmark_matching",
                "singletask_profile_finetune",
                "%s_singletask_profile_finetune_fold%d" % (tf_name, fold_num),
                "task_%d" % task_index,
                "%s_singletask_profile_finetune_task%d_fold%d_{key}.html" % (tf_name, task_index, fold_num)
            )
        rows.append(
            vdomh.tr(
                vdomh.td(tf_name),
                vdomh.td("Task %d" % task_index),
                vdomh.td(task_defs[tf_name][task_index][0]),
                vdomh.td(task_defs[tf_name][task_index][1]),
                vdomh.td(
                    vdomh.a("Count head", href=link.format(key="count")),
                    vdomh.span(" "),
                    vdomh.a("Profile head", href=link.format(key="profile"))
                )
            )
        )
    fold_num = tf_mt_folds[tf_name]
    link = os.path.join(
        "motif_benchmark_matching",
        "multitask_profile_finetune",
        "%s_multitask_profile_finetune_fold%d" % (tf_name, fold_num),
        "%s_multitask_profile_finetune_fold%d_{key}.html" % (tf_name, fold_num)
    )
    rows.append(
        vdomh.tr(
            vdomh.td(tf_name),
            vdomh.td("All tasks"),
            vdomh.td("N/A"),
            vdomh.td("N/A"),
            vdomh.td(
                vdomh.a("Count head", href=link.format(key="count")),
                vdomh.span(" "),
                vdomh.a("Profile head", href=link.format(key="profile"))
            )
        )
    )
    
display(vdomh.table(header, vdomh.tbody(*rows)))

<a id="filter_derived_motifs"></a>
### Convolutional filter motifs
Notebooks which show the motifs discovered from the convolutional filters, including:
- Motifs derived from highly-activating sequences for each convolutional filter
- Motifs derived from each convolutional filter itself

For each model, we show the motifs discovered.

In [None]:
header = vdomh.thead(
    vdomh.tr(
        vdomh.th("TF", style={"text-align": "center"}),
        vdomh.th("Task index", style={"text-align": "center"}),
        vdomh.th("Exp ID", style={"text-align": "center"}),
        vdomh.th("Cell type", style={"text-align": "center"}),
        vdomh.th("Link", style={"text-align": "center"})
    )
)

rows = []
for tf_name in tf_names:
    for task_index in range(tf_num_tasks[tf_name]):
        link = os.path.join(
            "filter_derived_motifs",
            "singletask_profile_finetune",
            "%s_singletask_profile_finetune_task%d_fold%d_filter_motifs.html" % (tf_name, task_index, tf_st_folds[tf_name][task_index]),
        )
        rows.append(
            vdomh.tr(
                vdomh.td(tf_name),
                vdomh.td("Task %d" % task_index),
                vdomh.td(task_defs[tf_name][task_index][0]),
                vdomh.td(task_defs[tf_name][task_index][1]),
                vdomh.td(
                    vdomh.a("Fine-tuned model", href=link)
                )
            )
        )
    link = os.path.join(
        "filter_derived_motifs",
        "multitask_profile_finetune",
        "%s_multitask_profile_finetune_fold%d_filter_motifs.html" % (tf_name, tf_mt_folds[tf_name]),
    )
    rows.append(
        vdomh.tr(
            vdomh.td(tf_name),
            vdomh.td("All tasks"),
            vdomh.td("N/A"),
            vdomh.td("N/A"),
            vdomh.td(
                vdomh.a("Fine-tuned model", href=link)
            )
        )
    )
    
display(vdomh.table(header, vdomh.tbody(*rows)))