# Prepare Production reaction structure pictures
> author: Shizhenkun   
> email: zhenkun.shi@tib.cas.cn   
> date: 2024-09-11  



## 1. Import packages

In [73]:
import sys,os
sys.path.insert(0, os.path.dirname(os.path.realpath('__file__')))
sys.path.insert(1,'../')
from config import conf as cfg
from modules.rxn.Reaction import Reaction
from modules.rxn.Molecule import Molecule
from tqdm import tqdm
from IPython.display import SVG, HTML, display
import pandas as pd
tqdm.pandas()  # Register the pandas integration with tqdm
FIRST_TIME_RUN = False # For the initial run, please set this flag to True. This will allow the program to download data from UniProt and RHEA, which may take longer depending on your internet speed.

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 2. Load data and Make reaction structure picture

In [74]:
ds_rxn = pd.read_feather(cfg.FILE_DS_RHEA_REACTIONS)
ds_rxn.head(2)

Unnamed: 0,reaction_id,equation,chebi_id,ec_number,equation_chebi,equation_smiles,equation_chebi_balanced
0,RHEA:22636,dCTP + H2O = dCMP + diphosphate + H(+),CHEBI:61481;CHEBI:15377;CHEBI:57566;CHEBI:3301...,EC:3.6.1.9;EC:3.6.1.12;EC:3.6.1.65,CHEBI:61481 + CHEBI:15377 = CHEBI:57566 + CHEB...,Nc1ccn([C@H]2C[C@H](O)[C@@H](COP([O-])(=O)OP([...,CHEBI:61481 + CHEBI:15377 = CHEBI:57566 + CHEB...
1,RHEA:22640,NADP(+) + sphinganine = 3-oxosphinganine + H(+...,CHEBI:58349;CHEBI:57817;CHEBI:58299;CHEBI:1537...,EC:1.1.1.102,CHEBI:58349 + CHEBI:57817 = CHEBI:58299 + CHEB...,NC(=O)c1ccc[n+](c1)[C@@H]1O[C@H](COP([O-])(=O)...,CHEBI:58349 + CHEBI:57817 = CHEBI:58299 + CHEB...


## 3.Make JSON file for reaction structure visualization

In [68]:
def make_rxn_structure_json(row):
    
    rxn_id = row.reaction_id
    rxn_smiles = row.equation_smiles
    rxn_equation = row.equation
    rxn_equation_ref_chebi = row.equation_chebi
    file_path = f'{cfg.DIR_RXN_JSON}{rxn_id}.json'.replace(':','_')
    write_path = f'{cfg.DIR_PROJECT_ROOT}/{file_path}'

    if not os.path.exists(write_path):
        rxn = Reaction( rxn_id=rxn_id,
                        rxn_smiles=rxn_smiles,
                        rxn_equation=rxn_equation,
                        rxn_equation_ref_chebi=rxn_equation_ref_chebi
                        )

        rxn.save_json_file(file_path=write_path)
        
    return file_path

In [70]:
ds_rxn.progress_apply(lambda x: make_rxn_structure_json(x), axis=1)

  0%|          | 0/12198 [00:00<?, ?it/s]

100%|██████████| 12198/12198 [03:55<00:00, 51.90it/s]


0        files/rxn_json/RHEA_22636.json
1        files/rxn_json/RHEA_22640.json
2        files/rxn_json/RHEA_22644.json
3        files/rxn_json/RHEA_22648.json
4        files/rxn_json/RHEA_22652.json
                      ...              
12193    files/rxn_json/RHEA_22604.json
12194    files/rxn_json/RHEA_22608.json
12195    files/rxn_json/RHEA_22612.json
12196    files/rxn_json/RHEA_22624.json
12197    files/rxn_json/RHEA_22628.json
Length: 12198, dtype: object

In [71]:
!mc mirror /hpcfs/fhome/shizhenkun/codebase/RXNRECer/files/ hpcs3/bucket-rxnrecer-web/files/

 0 B / ? ┃░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▓┃ 5s[0;22m[0m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[0m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m

### 反应作图示例

In [72]:
rxn_id = 'RHEA:75967'
rxn1 = Reaction(rxn_smiles=ds_rxn[ds_rxn.reaction_id == rxn_id].equation_smiles.values[0],
                rxn_equation=ds_rxn[ds_rxn.reaction_id == rxn_id].equation.values[0],
                rxn_equation_ref_chebi=ds_rxn[ds_rxn.reaction_id == rxn_id].equation_chebi.values[0]
                )
HTML(rxn1.to_html())

In [62]:
print(rxn1.to_json())

{
    "reaction_id": "",
    "reaction_smiles": "[H]OCCCCCC(=O)[O-].[H]O[H]>>[H]OCCCCCC(=O)[O-].OCCCCCC([O-])=O.[H+]",
    "reaction_equation": "(6-hydroxyhexanoyl)n + H2O = (6-hydroxyhexanoyl)n-1 + 6-hydroxyhexanoate + H(+)",
    "reaction_equation_ref_chebi": "CHEBI:195201 + CHEBI:15377 = CHEBI:32383 + CHEBI:195201 + CHEBI:15378",
    "reaction_ec": "",
    "reactants": [
        {
            "cpd_id": "",
            "cpd_smiles": "[H]OCCCCCC(=O)[O-]",
            "cpd_name": "(6-hydroxyhexanoyl)n",
            "cpd_ref_chebi": "CHEBI:195201",
            "cpd_link": "https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:195201",
            "mol_svg": "files/cpd_svg/947935dc9f835f5ab69912d3c755f769.svg",
            "cpd_num": 1
        },
        {
            "cpd_id": "",
            "cpd_smiles": "[H]O[H]",
            "cpd_name": "H2O",
            "cpd_ref_chebi": "CHEBI:15377",
            "cpd_link": "https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:15377",
          

In [43]:
print(rxn1.to_html())

<div style='display: flex; align-items: center; font-size:40px;'><h2 style='font-sze:100px;'></h2> + <h2 style='font-sze:100px;'></h2> = <h2 style='font-sze:100px;'></h2> + <h2 style='font-sze:100px;'></h2> + <h2 style='font-sze:100px;'></h2>>')[0], 
                 cpd_name=ds_rxn[ds_rxn.reaction_id == 'RHEA:22648'].equation.values[0].split('+')[0],
                 cpd_ref_chebi='CHEBI:15377'
                 )
mol1.write_mol_svg(cfg.TEMP_DIR + "mol1.svg")       
HTML(mol1.to_html())

In [51]:
mol1.mol_svg

'files/cpd_svg/6050fdaa8c8af28b425eaf4332c46f3f.svg'