# Add representations to DB
For every experiment, add representations to a separate table.
We use the following representations:
- Identifier of reactants I, M, T
- SMILES of product A (expected product)
- SMILES of reactants I, M, T
- reactionsSMILES
- atom-mapped, unbalanced reactionSMILES

In [None]:
import pathlib
import sys

sys.path.append(str(pathlib.Path().resolve().parents[1]))

from IPython.display import SVG, display
from rdkit import Chem
from rdkit.Chem import (
    Draw,
    rdChemReactions,
)

from src.util.db_utils import SynFermDatabaseConnection
from src.util.rdkit_util import map_reactions, desalt_building_block

In [None]:
# connect to DB
con = SynFermDatabaseConnection()

In [None]:
# import experiments
res = con.con.execute("SELECT e.id, e.initiator_long, e.monomer_long, e.terminator_long, e.product_A_smiles, bb1.SMILES, bb2.SMILES, bb3.SMILES FROM experiments as e LEFT JOIN building_blocks as bb1 on e.initiator_long = bb1.long LEFT JOIN building_blocks as bb2 on e.monomer_long = bb2.long LEFT JOIN building_blocks as bb3 on e.terminator_long = bb3.long;").fetchall()

columns = ['experiment_id', 'initiator_long', 'monomer_long', 'terminator_long', 'product_A_smiles', 'initiator_smiles', 'monomer_smiles', 'terminator_smiles']


In [None]:
def make_reaction_smiles(initiator, monomer, terminator, product):
    """Form unmapped, plain reactionSMILES"""
    return f"{initiator}.{monomer}.{terminator}>>{product}"

In [None]:
# generate reactionSMILES for all records
reaction_smiles = [make_reaction_smiles(*(exp[5:] + exp[4:5])) for exp in res]
len(reaction_smiles)

In [None]:
# prepare reactions for atom-mapping
rxn_TH = rdChemReactions.ReactionFromSmarts(
        '[#9]-[#5-](-[#9])(-[#9])-[#6:2](-[*:1])=[#8:3].[#8]=[#6]-1-[#8]-C-2(-[#6]-[#6]-[#6]-[#6]-[#6]2)-[#8]-[#6:7]11-[#6:6]-[#6:5]-[#7:4]-[#8]-1.[#6:11]-[#6:10](=[#16:12])-[#7:9]-[#7:8]>>[#6:11]-[#6:10]:1:[#7:9]:[#7:8]:[#6:7](-[#6:6]-[#6:5]-[#7:4]-[#6:2](-[*:1])=[#8:3]):[#16:12]-1'
    )

rxn_ABT = rdChemReactions.ReactionFromSmarts(
    "[#9]-[#5-](-[#9])(-[#9])-[#6:2](-[*:1])=[#8:3].[#8]=[#6]-1-[#8]-C-2(-[#6]-[#6]-[#6]-[#6]-[#6]2)-[#8]-[#6:7]11-[#6:6]-[#6:5]-[#7:4]-[#8]-1.[#7:8]-[#6:9]1:[#6:10]:[#6:11]:[#6:12]:[#6:13]:[#6:14]:1-[#16:15]>>[*:1]-[#6:2](=[#8:3])-[#7:4]-[#6:5]-[#6:6]-[#6:7]1:[#7:8]:[#6:9]2:[#6:10]:[#6:11]:[#6:12]:[#6:13]:[#6:14]:2:[#16:15]:1"
)
# prepare for visualization
rdChemReactions.Compute2DCoordsForReaction(rxn_TH)
rdChemReactions.Compute2DCoordsForReaction(rxn_ABT)
# prepare for enumeration
rxn_TH.Initialize()
rxn_ABT.Initialize()
# validate reactions
n_warn_TH, n_err_TH = rxn_TH.Validate(silent=True)
n_warn_ABT, n_err_ABT = rxn_ABT.Validate(silent=True)
if n_err_TH > 0:
    raise ValueError(f'Invalid reaction gave {n_err_TH} errors in validation')
if n_err_ABT > 0:
    raise ValueError(f'Invalid reaction gave {n_err_ABT} errors in validation')

In [None]:
Draw.ReactionToImage(rxn_TH)

In [None]:
Draw.ReactionToImage(rxn_ABT)

In [None]:
res[0:3]

In [None]:
# test the reactions on the first three records
# NOTE: It is expected that map_reactions(rxn_TH, ...) prints an error for ABTs and vice versa
rxn = map_reactions(rxn_TH, [[Chem.Mol(desalt_building_block(Chem.MolFromSmiles(smiles))) for smiles in reaction[5:]] for reaction in res[0:3]])

In [None]:
# check enumerated reactions
rxn[0][0]

In [None]:
# enumerate ABT reactions
rxns_abt = map_reactions(rxn_ABT, [[Chem.Mol(desalt_building_block(Chem.MolFromSmiles(smiles))) for smiles in reaction[5:]] for reaction in res])

In [None]:
# enumerate TH reactions
rxns_th = map_reactions(rxn_TH, [[Chem.Mol(desalt_building_block(Chem.MolFromSmiles(smiles))) for smiles in reaction[5:]] for reaction in res])

In [None]:
# combine ABT and TH reactions
rxns = [a if a else b for a,b in zip(rxns_abt, rxns_th)]

len(rxns)

In [None]:
# verify this worked
for r in rxns:
    assert r is not None
    assert len(r) == 1

In [None]:
rxns = [r[0] for r in rxns]

In [None]:
reaction_smiles_atom_mapped = [rdChemReactions.ReactionToSmiles(r) for r in rxns]

In [None]:
# assemble results for committing to DB
data = [(*i, j, k) for i, j, k in zip(res, reaction_smiles, reaction_smiles_atom_mapped)]

In [None]:
with con.con:
    con.con.executemany("INSERT INTO representations (experiment_id, I_long, M_long, T_long, product_A_smiles, I_smiles, M_smiles, T_smiles, reaction_smiles, reaction_smiles_atom_mapped) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", data)