In [1]:
import os
import sys
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/src/"))
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/notebooks/reports/"))
from util import figure_to_vdom_image
import plot.viz_sequence as viz_sequence
import numpy as np
import h5py
import matplotlib.pyplot as plt
import vdom.helpers as vdomh
from IPython.display import display

### Define constants and paths

In [2]:
# Define parameters/fetch arguments
motif_files = [
    os.path.join(
        "/users/amtseng/tfmodisco/results/reports/tfmodisco_results/cache/",
        "singletask_profile_finetune",
        "MAX_singletask_profile_finetune_fold1",
        "task_0",
        "MAX_singletask_profile_finetune_task0_fold1_count",
        "all_motifs.h5"
    )
]
group_names = [""]

print("Motif files: %s" % motif_files)
print("Group names: %s" % group_names)

Motif files: ['/users/amtseng/tfmodisco/results/reports/tfmodisco_results/cache/singletask_profile_finetune/MAX_singletask_profile_finetune_fold1/task_0/MAX_singletask_profile_finetune_task0_fold1_count/all_motifs.h5']
Group names: ['']


### Helper functions
For plotting and organizing things

In [3]:
def import_motifs(motif_files, group_names):
    """
    Imports a set of motifs from the saved HDF5 files.
    `group_names` is a list of group names, one for each motif file.
    Returns a list of motifs as L x 4 arrays, a parallel list of
    motif names, and a dictionary mapping group names to lists of
    motif names.
    """
    motifs, motif_names = [], []
    groups = {}
    for motif_file, stem in zip(motif_files, group_names):
        groups[stem] = []
        with h5py.File(motif_file, "r") as f:
            for key in f.keys():
                motif_name = "%s:%s" % (stem, key)
                motif_names.append(motif_name)
                motifs.append(f[key]["cwm_trimmed"][:])
                groups[stem].append(motif_name)
    return motifs, motif_names, groups

### Show motifs

In [4]:
motifs, motif_names, motif_groups = import_motifs(motif_files, group_names)

In [5]:
# Flip all motifs to be the purine-rich version
for i, motif in enumerate(motifs):
    if np.sum(motif[:, [0, 2]]) < 0.5 * np.sum(motif):
        motifs[i] = np.flip(motif)

In [6]:
# Show aggregated and constituent motifs for each cluster
colgroup = vdomh.colgroup(
    vdomh.col(style={"width": "5%"}),
    vdomh.col(style={"width": "95%"})
)

header = vdomh.thead(
    vdomh.tr(
        vdomh.th("Motif key", style={"text-align": "center"}),
        vdomh.th("CWM", style={"text-align": "center"})
    )
)

rows = []
for i, (motif_key, motif) in enumerate(zip(motif_names, motifs)):
    fig = viz_sequence.plot_weights(motif, figsize=(20, 4), return_fig=True)
    fig.tight_layout()
    rows.append(
        vdomh.tr(
            vdomh.td(motif_key),
            vdomh.td(figure_to_vdom_image(fig))
        )
    )

display(vdomh.table(colgroup, header, vdomh.tbody(*rows)))
plt.close("all")

Motif key,CWM
:0_0,
:0_1,
:0_2,
:0_3,
:0_4,
:0_5,
:0_6,
:0_7,
