In [1]:
from IPython.display import display, clear_output
import ipywidgets as widgets
from ipysheet import from_dataframe
from src.post_processing import PathWrangler
import pandas as pd
import svgutils
import re

In [3]:
path_filepath = '../artifacts/processed_expansions/found_paths.json'
predicted_reactions_filepath = "../artifacts/processed_expansions/predicted_reactions.json"
known_reactions_filepath = "../artifacts/processed_expansions/known_reactions.json"

pw = PathWrangler(
    path_filepath=path_filepath,
    pr_filepath=predicted_reactions_filepath,
    kr_filepath=known_reactions_filepath
)

In [4]:
# Options
starter_options, target_options = zip(*pw.starter_targets)
target_options = tuple(set(target_options))
evidence_options = [(elt.value, elt.name) for elt in pw.enzyme_existence]
sort_options = [
    ("Mean RCMCS", 'mean_rcmcs'),
    ("Min RCMCS", 'min_rcmcs'),
    ("Max-min driving force", 'mdf')
]

# Define filter / sort widgets
starter_selector = widgets.SelectMultiple(
    options=starter_options,
    value=starter_options[:3],
    rows=len(starter_options),
    description='Starters:',
    disabled=False,
    continuous_update=False,
)

target_selector = widgets.SelectMultiple(
    options=target_options,
    value=target_options[:3],
    rows=len(target_options),
    description='Targets:',
    disabled=False
)

evidence_selector = widgets.SelectMultiple(
    options=evidence_options,
    value=[pw.enzyme_existence.PROTEIN.name],
    rows=len(evidence_options),
    description='Enzyme level of evidence:',
    disabled=False
)

sort_by_radio_buttons = widgets.RadioButtons(
    options=sort_options,
    value='mean_rcmcs',
    description='Sort paths by:',
    disabled=False
)

selectors = widgets.HBox(
    [
        starter_selector,
        target_selector,
        evidence_selector,
        sort_by_radio_buttons
    ]
)

# Output contexts
gen_paths_out = widgets.Output()
path_viewer_out = widgets.Output()

# Path Viewer
path_selector = widgets.Dropdown(
    description='Path #',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)


# Define callbacks
def generate_paths(_event: dict = None):
    paths = pw.get_paths(
        starters=starter_selector.value,
        targets=target_selector.value,
        sort_by=sort_by_radio_buttons.value,
        # only paths w/ enzymes w/ protein-level evidence
        filter_by_enzymes={'existence': evidence_selector.value},
    )

    with gen_paths_out:
        clear_output()
        print(f"{len(paths)} paths meet your criteria")

    with path_viewer_out:
        clear_output()
        print(f"Loading paths...")

    update_path_selector(paths)
    test_stack, _test_link = update_path_viewer(paths)
    redisplay_paths(test_stack)


def update_path_selector(paths):
    path_selector.options = [(i + 1, i) for i in range(len(paths))]
    path_selector.value = 0 if paths else None


def update_path_viewer(paths):
    layers = [display_path(i, paths) for i in range(len(paths))]
    test_stack = widgets.Stack(layers, selected_index=None)
    test_link = widgets.jslink((path_selector, 'index'), (test_stack, 'selected_index'))
    return test_stack, test_link


def display_enzymes(enzymes):
    df = pd.DataFrame(data=[e.to_dict() for e in enzymes])
    df = df.loc[:, ['uniprot_id', 'ec', 'organism', 'name', 'existence', 'reviewed', 'sequence']]
    return from_dataframe(df)


def scl_svg_one_dim(filepath: str, width=None, height=None):
    original = svgutils.compose.SVG(filepath)
    if width and not height:
        scl = width / original.width
    elif not width and height:
        scl = height / original.height
    else:
        raise ValueError("Pick exactly one desired dimension, width or height")

    original.scale(scl)
    new = svgutils.compose.Figure(original.width * scl, original.height * scl, original)
    svg = new.tostr().decode("utf-8")
    patt = '<svg'
    i = re.search(patt, svg).start()
    return svg[i:]


def display_predicted_reaction(rxn_step, img):
    svg = scl_svg_one_dim(img, width=650)
    html = f"""
    <b><u>Step #{rxn_step + 1}</u></b>
    <div>
        {svg}
    </div>
    """
    return widgets.HTML(html)


def display_analogue(img, rcmcs):
    svg = scl_svg_one_dim(img, width=600)
    html = f"""
    <b><u>{rcmcs * 100:.2f}% similar to predicted reaction</u></b>
    <div>
        {svg}
    </div>
    """
    return widgets.HTML(value=html)


def display_analogues_enzymes(krs, rcmcses):
    kr_elts = []
    enzyme_elts = []
    for kr, rcmcs in zip(krs, rcmcses):
        kr_elts.append(display_analogue(kr.image, rcmcs))
        enzyme_elts.append(display_enzymes(kr.enzymes))

    kr_selector = widgets.Dropdown(
        options=[(i + 1, i) for i in range(len(kr_elts))],
        value=0,
        description="Known analogue: "
    )
    kr_stack = widgets.Stack(kr_elts, selected_index=0)
    kr_sel_disp = widgets.VBox([kr_selector, kr_stack])
    enzyme_stack = widgets.Stack(enzyme_elts, selected_index=0)
    _link_kr = widgets.jslink((kr_selector, 'index'), (kr_stack, 'selected_index'))
    _link_enz = widgets.jslink((kr_selector, 'index'), (enzyme_stack, 'selected_index'))

    tab_titles = ['Known Analogue', 'Enzymes']
    tab_children = [kr_sel_disp, enzyme_stack]
    tab = widgets.Tab(
        children=tab_children,
        titles=tab_titles,
        layout=widgets.Layout(width="950px")
    )
    return tab


def display_path(path_idx, paths):
    k = 1000
    path = paths[path_idx]
    header = f"""
    <h3>{len(path.reactions)}-step path from {path.starter.upper()} to {path.target.upper()}<br>
    Max-min driving force = {path.mdf:.2f} kJ/mol<br>
    </h3>
    """
    rows = [widgets.HTML(value=header)]
    for i, reaction in enumerate(path.reactions):
        pr_image = reaction.image
        analogues = reaction.top_analogues(k=k)
        rcmcses = reaction.top_rcmcs(k=k)
        pr_elt = display_predicted_reaction(i, pr_image)
        kr_elt = display_analogues_enzymes(analogues, rcmcses)
        rows.append(widgets.HBox([pr_elt, kr_elt], layout=widgets.Layout(border='1px solid black')))
    return widgets.VBox(rows)


def redisplay_paths(test_stack):
    with path_viewer_out:
        clear_output()
        display(path_selector, test_stack)


# Attach listeners to UI widgets
for selector in selectors.children:
    selector.observe(generate_paths, names=['value'])

# Display the UI
display(selectors, gen_paths_out, path_viewer_out)

HBox(children=(SelectMultiple(description='Starters:', index=(0, 1, 2), options=('malate', 'oxaloacetate', 'py…

Output()

Output()

In [5]:
# Initialize paths
generate_paths()