In [5]:
from hydra import initialize, compose
from pathlib import Path
import pandas as pd
from ergochemics.draw import draw_reaction, draw_molecule
from ergochemics.mapping import rc_to_nest
from IPython.display import SVG

with initialize(version_base=None, config_path="./conf/filepaths"):
    cfg = compose(config_name="filepaths")

In [6]:
import json
with open(Path(cfg.raw_data) / "pathway" / "sprhea_240310_v3_mapped_no_subunits.json", 'r') as f:
    rhea = json.load(f)
print(len(rhea))
print(len({k: v for k, v in rhea.items() if v['min_rule'] is not None}))

18954
13114


In [7]:
fn = "mapped_sprhea_240310_v3_mapped_no_subunits_x_min_rules.parquet"
df = pd.read_parquet(Path(cfg.processed_data) / "pathway" / fn)
df["reaction_center"] = df["reaction_center"].apply(rc_to_nest)
print(len(df))
print(f"{len(df) * 100 / len(rhea):.1f}%")
df.head()

13851
73.1%


Unnamed: 0,rxn_id,smarts,am_smarts,rule,reaction_center,rule_id
0,0,NC(CCC=O)C(=O)O.NC(=O)c1ccc[n+](C2OC(COP(=O)(O...,[NH2:7][CH:6]([CH2:5][CH2:3][CH:1]=[O:4])[C:8]...,[#6:1].[#6:2]1:[#6:3]:[#6:4]:[#6:5]:[#7+:6]:[#...,"(((4,), (3, 4, 5, 6, 7, 47), (2,)), ((0, 1), (...",819
1,1,CC(O)C(O)C(O)C(O)C(=O)O>>CC(O)C(O)CC(=O)C(=O)O.O,[CH3:7][CH:5]([OH:8])[CH:4]([OH:6])[CH:1]([OH:...,([#6:1]-[#8:2].[#6:3]-[#8:4])>>([#6:1].[#6:3]=...,"(((5, 6, 7, 8),), ((0, 1, 2), (0,)))",402
2,10,CCCCCCCCCCCC(=O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc...,[CH3:14][CH2:13][CH2:12][CH2:11][CH2:10][CH2:9...,[#6:1]-[#8:2].[#8:3]-[#15:4]>>[#6:1]-[#8:3].[#...,"(((11, 13), (2, 1)), ((0, 1), (0, 1)))",994
3,100,*C(=O)OCC(COP(=O)(O)OC1C(O)C(OP(=O)(O)O)C(O)C(...,[*:30][C:27](=[O:31])[O:25][CH2:23][CH:22]([CH...,[#6:1]-[#8:2].[#8:3]>>[#6:1]-[#8:3].[#8:2],"(((15, 16), (0,)), ((0, 1), (0,)))",517
4,1000,CCC=O.NC(=O)c1ccc[n+](C2OC(COP(=O)(O)OP(=O)(O)...,[CH3:5][CH2:3][CH:1]=[O:4].[NH2:13][C:12](=[O:...,[#6:1].[#6:2]1:[#6:3]:[#6:4]:[#6:5]:[#7+:6]:[#...,"(((2,), (3, 4, 5, 6, 7, 47), (0,)), ((0, 1), (...",819


In [9]:
rule_of_i = 919
for _, row in df.loc[df['rule_id'] == rule_of_i].iloc[:10].iterrows():
    print(f"Reaction: {row['rxn_id']}")
    display(SVG(draw_reaction(row["am_smarts"])))
    for smi, rc in zip(row["am_smarts"].split(">>")[0].split("."), row["reaction_center"][0]):
        display(SVG(draw_molecule(smi, size=(300, 150), highlight_atoms=rc)))