In [21]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
from pathlib import Path
from src.chem_draw import draw_molecule
from src.post_processing import PathWrangler
from hydra import compose, initialize
import polars as pl
import pandas as pd
from functools import partial
import os
import pathlib
import ipywidgets as widgets
import traitlets
from IPython.display import display, clear_output
from ipyaggrid import Grid

with initialize(version_base=None, config_path="../conf/filepaths"):
    cfg = compose(config_name="filepaths")

In [23]:
study = Path(cfg.results) /  "test"
known = Path(cfg.known)
study, known

(PosixPath('/home/stef/quest_data/bottle/results/test'),
 PosixPath('/home/stef/bottle/artifacts/known'))

In [24]:
pw = PathWrangler(study=study, known=known)
pw.targets

('L-glutamine',)

In [25]:
batch = pw.get_paths(
    starters=["alpha_ketoglutarate"],
    targets=["L-glutamine"],
    sort_by="min_max_rxn_sim",
    lower_bounds={"feasibility_frac": 0.5},
    filter_by_enzymes={'existence': [
        'Evidence at protein level',
        'Evidence at transcript level',
        'Inferred from homology'
    ]}
)

In [26]:
batch

{'paths': shape: (9, 14)
 ┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
 │ id        ┆ starter   ┆ target    ┆ reactions ┆ … ┆ mean_mean ┆ min_max_r ┆ min_mean_ ┆ feasibil │
 │ ---       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ _rxn_sim  ┆ xn_sim    ┆ rxn_sim   ┆ ity_frac │
 │ str       ┆ str       ┆ str       ┆ list[str] ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
 │           ┆           ┆           ┆           ┆   ┆ f32       ┆ f32       ┆ f32       ┆ f32      │
 ╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
 │ P28e72cda ┆ alpha_ket ┆ L-glutami ┆ ["R86f73d ┆ … ┆ 0.789172  ┆ 0.976744  ┆ 0.724315  ┆ 1.0      │
 │ e9a14d0db ┆ oglutarat ┆ ne        ┆ 66a5e7ccc ┆   ┆           ┆           ┆           ┆          │
 │ 307ab59e6 ┆ e         ┆           ┆ 343a1c898 ┆   ┆           ┆           ┆           ┆          │
 │ f77…      ┆           ┆           ┆ 8fa…      ┆   ┆   

In [None]:
js_uniprot_id_renderer = '''
    function(params){
        if (params.value !== undefined && params.value !== null){
            return `<a href="https://www.uniprot.org/uniprotkb/${params.value}/entry" target="_blank" rel="noopener noreferrer">${params.value}</a>`;
        }
        return ""
    }
'''

def build_enzymes(enzymes: pl.DataFrame) -> Grid:
    df = (
        enzymes.to_pandas()
        .loc[:, ['id', 'ec', 'organism', 'name', 'existence', 'reviewed', 'sequence']]
        .pipe(lambda df: df.set_index(pd.Index(range(1, len(df) + 1), name='idx')))
    )
    grid = Grid(
        grid_data=df,
        grid_options={
            'columnDefs': [
                {'headerName': '', 'field': df.index.name, 'width': 40},
                {'headerName': 'UniProt ID ⤴', 'field': 'id', 'cellRenderer': js_uniprot_id_renderer, 'width': 120},
                {'headerName': 'EC', 'field': 'ec', 'width': 100},
                {'headerName': 'Organism', 'field': 'organism', 'width': 250},
                {'headerName': 'Name', 'field': 'name', 'width': 300},
                {'headerName': 'Existence', 'field': 'existence', 'width': 200},
                {'headerName': 'Reviewed', 'field': 'reviewed', 'width': 100},
                {'headerName': 'Sequence', 'field': 'sequence', 'width': 500},
            ],
            'enableSorting': True,
            'enableFilter': True,
            'enableColResize': True,
            'enableRangeSelection': True,
        },
        index=True,
        theme='ag-theme-balham',
        quick_filter=True,
        height=190,
        width=900,
    )
    grid.unsync = True
    return grid

def display_predicted_reaction(rxn_step: int, img: Path, feasibility: int):
    html = widgets.HTML(f'<b><u>Step #{rxn_step + 1} | Reaction feasibilty: {bool(feasibility)}</u></b>')
    svg = widgets.Image.from_file(img)
    return widgets.VBox([html, svg])

def widget_path_view(batch: dict[str, pl.DataFrame], idx: int, top_k_analogues: int = 10):
    path = batch["paths"].row(idx, named=True)
    header = widgets.HTML(f"""
    <h3>{len(path["reactions"])}-step path from {path["starter"].upper()} to {path["target"].upper()}<br>
    Max-min driving force: {round(path["mdf"], 2) if path["mdf"] else 'N/A'} kJ/mol<br>
    Path feasibility: {path["feasibility_frac"]:.2f}<br>
    ID: {path["id"]}
    </h3>
    """)
    rows = [header]
    pred_rxns = batch["predicted_reactions"].filter(
        pl.col("id").is_in(path["reactions"])
    ).select(
        pl.col("id"),
        pl.col("dxgb_label"),
        pl.col("rxn_sims"),
        pl.col("analogue_ids"),
        pl.col("image"),
    )
    for i, row in enumerate(pred_rxns.iter_rows(named=True)):
        pr_elt = display_predicted_reaction(
            rxn_step=i,
            img=Path(row["image"]),
            feasibility=row["dxgb_label"]
        )

        krid_to_sim = dict(zip(row['analogue_ids'], row["rxn_sims"]))
        analogues = batch["known_reactions"].filter(
            pl.col("id").is_in(row["analogue_ids"])
        ).with_columns(
            pl.col("id").replace(krid_to_sim).alias("rxn_sim").cast(pl.Float32),
        ).sort(
            pl.col("rxn_sim"),
            descending=True
        ).slice(0, top_k_analogues)

        enzymes = batch['enzymes'].filter(
            pl.col("id").is_in(set(analogues["enzymes"].explode()))
        )
        
        kr_elt = widget_analogues_enzymes(analogues, enzymes)

        row = widgets.GridBox(
            children=[pr_elt,  kr_elt],
            layout=widgets.Layout(
                border='1px solid black',
                height='280px',
                grid_template_rows='auto',
                grid_template_columns='55% 45%',
            )
        )
        rows.append(row)

    return widgets.VBox(rows)

def display_analogue(img: str, rxn_sim: float):
    html = widgets.HTML(f'<b><u>{rxn_sim * 100:.2f}% similar to predicted reaction</u></b>')
    svg = widgets.Image.from_file(img)
    return widgets.VBox([html, svg])


def widget_analogues_enzymes(analogues: pl.DataFrame, enzymes: pl.DataFrame):
    kr_elts = []
    enzyme_elts = []
    for row in analogues.iter_rows(named=True):
        kr_elts.append(display_analogue(row["image"], row["rxn_sim"]))
        enz = enzymes.filter(pl.col("id").is_in(row["enzymes"]))
        enzyme_elts.append(build_enzymes(enz))

    kr_selector = widgets.Dropdown(
        options=[(i + 1, i) for i in range(len(kr_elts))],
        value=0,
        description="Analogue: "
    )
    kr_stack = widgets.Stack(kr_elts, selected_index=0)
    kr_sel_disp = widgets.VBox([kr_selector, kr_stack])
    enzyme_stack = widgets.Stack(enzyme_elts, selected_index=0)
    _link_kr = widgets.jslink((kr_selector, 'index'), (kr_stack, 'selected_index'))
    _link_enz = widgets.jslink((kr_selector, 'index'), (enzyme_stack, 'selected_index'))

    return widgets.Tab(
        titles=['Known Analogues', 'Enzymes'],
        children=[kr_sel_disp, enzyme_stack],
    )

wstack = []
for i in range(len(batch["paths"])):
    wstack.append(widget_path_view(batch, i))

In [37]:
batch['known_reactions']

id,smarts,enzymes,reverse,db_ids,image
str,str,list[str],str,list[str],str
"""35""","""CSCCC(N)C(=O)O.NC(=O)CCC(=O)C(…","[""O31665""]","""16541""","[""rhea:30393""]","""/home/stef/quest_data/bottle/r…"
"""79""","""NC(CCC(=O)O)C(=O)O.N>>NC(=O)CC…","[""Q8AA75"", ""B0C9Y4"", … ""Q04ZG6""]","""164""","[""rhea:15891""]","""/home/stef/quest_data/bottle/r…"
"""241""","""NC(CCC(=O)O)C(=O)O.O=C(COP(=O)…","[""Q5WX92"", ""B9LZ53"", … ""A6LUF3""]","""643""","[""rhea:23746""]","""/home/stef/quest_data/bottle/r…"
"""401""","""CC(C)CC(=O)C(=O)O.NC(CCC(=O)O)…","[""P0AB81"", ""O35854"", … ""P54691""]","""1023""","[""rhea:18323""]","""/home/stef/quest_data/bottle/r…"
"""438""","""NC(CCC(=O)O)C(=O)O.O=C(O)C(=O)…","[""P57007"", ""Q1D2L9"", … ""A4JCH1""]","""1617""","[""rhea:14331""]","""/home/stef/quest_data/bottle/r…"
…,…,…,…,…,…
"""20371""","""NC(CCC(=O)O)C(=O)O.CC(=O)CC(=O…","[""B0VH76""]","""18666""","[""rhea:50938""]","""/home/stef/quest_data/bottle/r…"
"""20386""","""O=C(O)C=CC(=O)O.N>>NC(=O)C=CC(…","[""Q88FY5""]","""19201""","[""rhea:27387""]","""/home/stef/quest_data/bottle/r…"
"""20405""","""CSCCC(N)C(=O)O.O=C(O)C(=O)Cc1c…","[""O85746""]","""10998""","[""rhea:47086""]","""/home/stef/quest_data/bottle/r…"
"""20433""","""NC(=O)C1=CN(C2OC(COP(=O)(O)OP(…","[""A0A0L8M630""]","""12577""","[""rhea:76861""]","""/home/stef/quest_data/bottle/r…"


In [36]:
wstack[0]

VBox(children=(HTML(value='\n    <h3>2-step path from ALPHA_KETOGLUTARATE to L-GLUTAMINE<br>\n    Max-min driv…

In [None]:
import time

# Search button
search_button = widgets.Button(
    description='Load paths',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Load paths',
    icon='flask' # (FontAwesome names without the `fa-` prefix)
)

paths_label = widgets.Label(
    value='Make sorting & filtering selections above',
)
# UI overall

paths_selector = widgets.Label(
    value='Select a path to view:',
)

ui = widgets.VBox(
    children=[
        search_button,
        paths_label,
    ],
    layout=widgets.Layout(
        width='99%',
        justify_content='flex-start',
    ),
)

In [None]:


# on paths selection change :: update paths label & render the paths viewer

def loading_paths(change: traitlets.Bunch):
    paths_label.value = 'Loading paths...'
    time.sleep(5)


def update_paths_label(change: traitlets.Bunch):
    paths_label.value = f'Fetched {len(change.owner.options)} paths'

search_button.on_click(loading_paths)

paths_selector.observe(update_paths_label, names=['value'])