In [1]:
from hydra import initialize, compose
from pathlib import Path
import pandas as pd
from ergochemics.draw import draw_reaction, draw_molecule
from ergochemics.mapping import rc_to_nest

with initialize(version_base=None, config_path="./conf/filepaths"):
    cfg = compose(config_name="filepaths")

In [2]:
krs = pd.read_parquet(Path(cfg.processed_data) / "pathway" / "known_reactions.parquet")
tot_krs = len(krs)
print(f"Total known reactions: {tot_krs}")

Total known reactions: 28142


In [3]:
rules = [
    "mechinformed",
    "mechinferred_dt_01",
    "mechinferred_dt_02",
    "mechinferred_dt_04",
    "mechinferred_dt_13",
    "mechinferred_dt_91",
    "rc_plus_0",
    "rc_plus_1",
    "rc_plus_2",
    "rc_plus_3",
    "rc_plus_4",
]


for rule in rules:
    fn = f"mapped_known_reactions_x_{rule}_rules.parquet"
    df = pd.read_parquet(Path(cfg.processed_data) / "pathway" / fn)
    df["template_aidxs"] = df["template_aidxs"].apply(rc_to_nest)
    print(f"{rule} kr coverage: {len(df) * 100 / tot_krs:.1f}%, {len(df)} total reactions")

mechinformed kr coverage: 43.1%, 12137 total reactions
mechinferred_dt_01 kr coverage: 69.1%, 19433 total reactions
mechinferred_dt_02 kr coverage: 69.1%, 19433 total reactions
mechinferred_dt_04 kr coverage: 69.1%, 19439 total reactions
mechinferred_dt_13 kr coverage: 69.1%, 19439 total reactions
mechinferred_dt_91 kr coverage: 69.1%, 19439 total reactions
rc_plus_0 kr coverage: 69.1%, 19433 total reactions
rc_plus_1 kr coverage: 69.1%, 19437 total reactions
rc_plus_2 kr coverage: 69.1%, 19437 total reactions
rc_plus_3 kr coverage: 69.1%, 19435 total reactions
rc_plus_4 kr coverage: 69.1%, 19437 total reactions


In [4]:
import polars as pl

In [5]:
krs = pl.read_parquet("/home/stef/enz_rxn_data/data/processed/pathway/known_reactions.parquet")
krs.head()

id,smarts,enzymes,reverse,db_ids
str,str,list[str],str,list[str]
"""e908a824c912d1e39c46de92d1f738…","""**.NC(CCC(=O)NC(CS)C(=O)NCC(=O…","[""P10649"", ""P50472"", … ""Q08863""]","""3bbe8e6dca0da1c745e7678c8efebe…","[""RHEA:16438""]"
"""d86d99a8143d3be8fc861a5de5e625…","""*.*.*.*.*.*.*.*.*.*.CC(C)(COP(…","[""A0A0C6E0I7"", ""A0A0C6DWS6"", … ""Q0UK50""]","""e99fb0b75e799eba72aee21c1d04ee…","[""RHEA:51350""]"
"""2972b2db66715ba6f3d6aeaf07f31b…","""*.*.*.*.*.*.*.*.*NC(COP(=O)(O)…","[""G0REX6"", ""A0A482N9V7"", ""P0DO30""]","""5a7fffef224c65b7cd431ddc668eb2…","[""RHEA:64546""]"
"""572e4a84a946af07c9ab6fd1c0347b…","""*.*.*.*.*.*.*.*.*OP(=O)(O)OCC(…","[""A5UNX8"", ""Q57952"", … ""Q9V2B0""]","""c4c936d8d985a457a128601b8d2769…","[""RHEA:64377""]"
"""dfe7fc761de3c213bf2b18fae4f8a5…","""*.*.*.*.*.*.*.*.CC(C)CCCC(C)CC…","[""A5UNX8"", ""Q57952"", … ""Q9V2B0""]","""2e026380a550baa6b343604e6f5500…","[""RHEA:64369""]"


In [6]:
krs.filter(
    pl.col("db_ids").list.contains("RHEA:24814")
)

id,smarts,enzymes,reverse,db_ids
str,str,list[str],str,list[str]
"""be524f61739a1a072c6f4a7100dd55…","""CC(C)C(N)C(=O)O.O=C(O)CCC(=O)C…","[""Q5HIC1"", ""Q9ZJF1"", … ""P54691""]","""3ca7439df427de8a3930ebbd6bffe3…","[""RHEA:24814""]"


In [7]:
up = pl.read_csv(
    "/home/stef/enz_rxn_data/data/raw/pathway/uniprotkb_reviewed_true_2025_04_01.tsv",
    separator="\t",
)

In [8]:
up.head()

Entry,Reviewed,Protein names,Protein existence,Length,Sequence,EC number,Organism,Gene Names,Catalytic activity
str,str,str,str,i64,str,str,str,str,str
"""A0A009IHW8""","""reviewed""","""2' cyclic ADP-D-ribose synthas…","""Evidence at protein level""",269,"""MSLEQKKGADIISKILQIQNSIGKTTSPST…","""3.2.2.-; 3.2.2.6""","""Acinetobacter baumannii (strai…","""J512_3302""","""CATALYTIC ACTIVITY: Reaction=N…"
"""A0A023I7E1""","""reviewed""","""Glucan endo-1,3-beta-D-glucosi…","""Evidence at protein level""",796,"""MRFQVIVAAATITMITSYIPGVASQSTSDG…","""3.2.1.39""","""Rhizomucor miehei""","""ENG1 LAM81A""","""CATALYTIC ACTIVITY: Reaction=H…"
"""A0A024B7W1""","""reviewed""","""Genome polyprotein [Cleaved in…","""Evidence at protein level""",3423,"""MKNPKKKSGGFRIVNMLKRGVARVSPFGGL…","""2.1.1.56; 2.1.1.57; 2.7.7.48; …","""Zika virus (isolate ZIKV/Human…",,"""CATALYTIC ACTIVITY: [RNA-direc…"
"""A0A024RXP8""","""reviewed""","""Exoglucanase 1 (EC 3.2.1.91) (…","""Evidence at protein level""",514,"""MYRKLAVISAFLATARAQSACTLQSETHPP…","""3.2.1.91""","""Hypocrea jecorina (strain ATCC…","""cbh1 M419DRAFT_125125""","""CATALYTIC ACTIVITY: Reaction=H…"
"""A0A024SC78""","""reviewed""","""Cutinase (EC 3.1.1.74)""","""Evidence at protein level""",248,"""MRSLAILTTLLAGHAFAYPKPAPQSVNRRD…","""3.1.1.74""","""Hypocrea jecorina (strain ATCC…","""M419DRAFT_76732""","""CATALYTIC ACTIVITY: Reaction=c…"
