In [1]:
from hydra import initialize, compose
from pathlib import Path
import pandas as pd
from ergochemics.draw import draw_reaction, draw_molecule
from ergochemics.mapping import rc_to_nest

with initialize(version_base=None, config_path="./conf/filepaths"):
    cfg = compose(config_name="filepaths")

In [2]:
krs = pd.read_parquet(Path(cfg.processed_data) / "pathway" / "known_reactions.parquet")
tot_krs = len(krs)
print(f"Total known reactions: {tot_krs}")

Total known reactions: 28142


In [4]:
rules = [
    "mechinformed",
    "mechinferred_dt_01",
    "mechinferred_dt_02",
    "mechinferred_dt_04",
    "mechinferred_dt_13",
    "mechinferred_dt_91",
    "rc_plus_0",
    "rc_plus_1",
    "rc_plus_2",
    "rc_plus_3",
    "rc_plus_4",
]


for rule in rules:
    fn = f"mapped_known_reactions_x_{rule}_rules.parquet"
    df = pd.read_parquet(Path(cfg.processed_data) / "pathway" / fn)
    df["template_aidxs"] = df["template_aidxs"].apply(rc_to_nest)
    print(f"{rule} kr coverage: {len(df) * 100 / tot_krs:.1f}%, {len(df)} total reactions")

mechinformed kr coverage: 43.1%, 12137 total reactions
mechinferred_dt_01 kr coverage: 69.1%, 19433 total reactions
mechinferred_dt_02 kr coverage: 69.1%, 19433 total reactions
mechinferred_dt_04 kr coverage: 69.1%, 19439 total reactions
mechinferred_dt_13 kr coverage: 69.1%, 19439 total reactions
mechinferred_dt_91 kr coverage: 69.1%, 19439 total reactions
rc_plus_0 kr coverage: 69.1%, 19433 total reactions
rc_plus_1 kr coverage: 69.1%, 19437 total reactions
rc_plus_2 kr coverage: 69.1%, 19437 total reactions
rc_plus_3 kr coverage: 69.1%, 19435 total reactions
rc_plus_4 kr coverage: 69.1%, 19437 total reactions
