# Add reactionSMILES to VL
We add atom-mapped reactionSMILES to the virtual library.
We only do this for VL members where type == 'A'.

In [6]:
import pathlib
import sys

sys.path.append(str(pathlib.Path().resolve().parents[1]))

from IPython.display import SVG, display
import pandas as pd

from src.util.db_utils import SynFermDatabaseConnection
from src.util.rdkit_util import map_reactions, desalt_building_block
from src.library_design.reaction_generator import SFReactionGenerator

In [9]:
gen = SFReactionGenerator()

In [4]:
# connect to DB
con = SynFermDatabaseConnection()

In [22]:
# n.b. we just select everything now, later filter for things that the 0D model cannot handle.
# Here we don't care whether the reaction has been seen before, we can merge with the reaction data later
res = con.con.execute("SELECT id, long_name, SMILES FROM virtuallibrary WHERE type = 'A' AND initiator_long != '4-Pyrazole002';").fetchall()
df = pd.DataFrame(res, columns=["vl_id", "long_name", "product_A_smiles"])
df.head()

Unnamed: 0,vl_id,long_name,product_A_smiles
0,1,2-Pyr002 + Fused002 + TerABT001,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...
1,2,2-Pyr002 + Fused002 + TerABT004,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...
2,3,2-Pyr002 + Fused002 + TerABT005,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...
3,4,2-Pyr002 + Fused002 + TerABT006,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...
4,5,2-Pyr002 + Fused002 + TerABT007,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...


In [23]:
%%time
df[0:1000]["product_A_smiles"].apply(lambda x: gen.get_reaction_smiles(x))

CPU times: user 19.4 s, sys: 148 ms, total: 19.5 s
Wall time: 15min 26s


0      F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
1      F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
2      F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
3      F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
4      F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
                             ...                        
995    F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
996    F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
997    F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
998    F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
999    F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
Name: product_A_smiles, Length: 1000, dtype: object

In [24]:
# expected time in min to go through the entire VL
len(df) / 1000 * 19 / 60

84.54746666666666

In [25]:
# we wrap the generator to catch errors
def get_reaction_smiles(x):
    try:
        return gen.get_reaction_smiles(x)
    except ValueError as e:
        print(e)
        print(x)
        return None
        

In [26]:
# first we need to generate the reactionSMILES. This will take a moment 
# (to be precise I expect it to take 85 min on Mac M1)
gen = SFReactionGenerator()
df["reaction_smiles_atom_mapped"] = df["product_A_smiles"].apply(lambda x: get_reaction_smiles(x))
df.head()

 4 3 
[17:34:25] product atom-mapping number 14 found multiple times.
[17:34:25] product atom-mapping number 13 found multiple times.
[17:34:25] product atom-mapping number 12 found multiple times.
[17:34:25] product atom-mapping number 11 found multiple times.
[17:34:25] product atom-mapping number 10 found multiple times.
[17:34:25] product atom-mapping number 9 found multiple times.
[17:34:25] product atom-mapping number 8 found multiple times.
[17:34:25] product atom-mapping number 7 found multiple times.
[17:34:25] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 1 2 6 5 4 3 
[17:34:25] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 7 8 9 10 11 12 13 14 
[17:34:25] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 7 8 9 10 11 12 13 14 
[17:34:25] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 1 2 3 
e reactants were not mapped i

Unnamed: 0,vl_id,long_name,product_A_smiles,reaction_smiles_atom_mapped
0,1,2-Pyr002 + Fused002 + TerABT001,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...,F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
1,2,2-Pyr002 + Fused002 + TerABT004,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...,F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
2,3,2-Pyr002 + Fused002 + TerABT005,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...,F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
3,4,2-Pyr002 + Fused002 + TerABT006,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...,F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...
4,5,2-Pyr002 + Fused002 + TerABT007,COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C...,F[B-](F)(F)[C:1](=[O:2])[c:15]1[cH:16][c:18]([...


In [27]:
# save this
with con.con:
    con.con.executemany("UPDATE virtuallibrary SET reaction_smiles_atom_mapped = ? WHERE id = ?;", df[["reaction_smiles_atom_mapped", "vl_id"]].values)