In [ ]:
# Develop SMARTS

Just a playground notebook to check some ideas for substructure SMARTS or reactionSMARTS for pattern matching / enumeration in SynFerm libraries

In [None]:
import pathlib
import sys

import pandas as pd
from rdkit import Chem
from rdkit.Chem import ChiralType
from rdkit.Chem import Draw
from rdkit.Chem.rdChemReactions import ReactionFromSmarts

sys.path.append(str(pathlib.Path().resolve().parents[1]))
from src.util.db_utils import SynFermDatabaseConnection

In [None]:
# get building blocks
con = SynFermDatabaseConnection()
res = con.con.execute("SELECT * FROM building_blocks").fetchall()
header = [i[1] for i in con.con.execute("PRAGMA table_info(building_blocks)").fetchall()]
df = pd.DataFrame(res, columns=header)
initiators = df.loc[df["category"] == "I"]
monomers = df.loc[df["category"] == "M"]
terminators = df.loc[df["category"] == "T"]

In [None]:
mol = Chem.MolFromSmiles("COc1ccc(CCOC(=O)N2C[C@H](NC(=O)c3cc(Cl)ccn3)[C@H](c3nc4ccccc4s3)C2)cc1OC")
mol

In [None]:
pat = Chem.MolFromSmarts("[#6:1]-[#6](=O)-[NR0]-[#6:2]-[#6:3]-c1nc2[c:4][c:5][c:6][c:7]c2s1")
pat

In [None]:
mol.GetSubstructMatches(pat)

In [None]:
mol

In [None]:
Chem.MolFromSmiles("COc1ccc(CCOC(=O)N2C[C@@H]3NOC4(OC5(CCCCC5)OC4=O)[C@@H]3C2)cc1OC")

In [None]:
Chem.MolFromSmiles("COc1ccc(CCOC(=O)N2C[C@@H]3NO[C@]4(OC5(CCCCC5)OC4=O)[C@@H]3C2)cc1OC.Cl")

In [None]:
mol = Chem.MolFromSmiles("COc1ccc(CCOC(=O)N2C[C@@H]3NO[C@]4(OC5(CCCCC5)OC4=O)[C@@H]3C2)cc1OC")
mol

In [None]:
pat = Chem.MolFromSmarts("[$([CR2](O1)(ONC2)(C2)C(=O)OC1)]")  # hits a spiro carbon atom between an isoxazolidine and a 5-membered lactone-ether ring
pat

In [None]:
mol.GetSubstructMatches(pat)

In [None]:
a = mol.GetAtomWithIdx(16)

In [None]:
a.SetChiralTag(ChiralType.CHI_UNSPECIFIED)

In [None]:
Chem.MolToSmiles(mol) == "COc1ccc(CCOC(=O)N2C[C@@H]3NOC4(OC5(CCCCC5)OC4=O)[C@@H]3C2)cc1OC"

In [None]:
def remove_monomer_pg_chirality(mol):
    """
    Given a monomer building block, remove only the chiral information 
    for the spiro carbon of the protecting group, leaving the rest unchanged.
    """
    if isinstance(mol, str):
        mol = Chem.MolFromSmiles(mol)
    else:
        mol = Chem.Mol(mol)
    match = mol.GetSubstructMatches(pat)
    assert len(match) == 1
    atom = mol.GetAtomWithIdx(match[0][0])
    atom.SetChiralTag(ChiralType.CHI_UNSPECIFIED)
    return mol

In [None]:
Draw.MolsToGridImage([remove_monomer_pg_chirality(smi) for smi in monomers["SMILES"]])

In [None]:
con.get_vl_member(19)

In [None]:
_backwards_rxn_abt = "[#6:1]-C(=O)-N-[C:2]-[C:3]-c1nc2[c:4][c:5][c:6][c:7]c2s1>>F-[B-](-F)(-F)-C(-[#6:1])=O.O=C1-O-C2(-C-C-C-C-C-2)-O-C-1-1-[C:3]-[C:2]-N-O-1.N-c1[c:4][c:5][c:6][c:7]c1-S"
rxn = ReactionFromSmarts(_backwards_rxn_abt)
rxn

In [None]:
res = rxn.RunReactants((con.get_vl_member(18),))[0]
[Chem.SanitizeMol(m) for m in res]

In [None]:
Draw.MolsToGridImage(res)

In [None]:
Chem.MolFromSmiles("NNC(=S)/C=C/c1ccccc1")

In [None]:
Chem.MolFromSmiles("NNC(=S)C=Cc1ccccc1")

In [None]:
Chem.MolToSmiles(con.get_vl_member(25))

In [None]:
pat

In [None]:
mol = con.get_vl_member(288)
mol.GetSubstructMatches(pat)
mol

In [None]:
con.get_vl_member(54051)

In [None]:
rxn = ReactionFromSmarts(
    "[$(B(-F)(-F)-F)]-[C:2](-[#6:1])=[O:3].O=C1-O-[$(C2CCCCC2)]-O-[C:7]-1-1-[C:6]-[C:5]-[N:4]-O-1.[N:8]-[c:9]1:[c:10]:[c:11]:[c:12]:[c:13]:[c:14]:1-[S:15]>>[#6:1]-[C:2](=[O:3])-[N:4]-[C:5]-[C:6]-[c:7]1:[n:8]:[c:9]2:[c:10]:[c:11]:[c:12]:[c:13]:[c:14]:2:[s:15]:1."
)
rxn

In [None]:
rxn = ReactionFromSmarts(
"[$(B(-F)(-F)-F)]-[C:2](-[#6:1])=[O:3].O=C1-O-[$(C2CCCCC2)]-O-[C:7]-1-1-[C:6]-[C:5]-[N:4]-O-1.[#6:8]-[C:9](=[S:10])-[N:11]-[N:12]>>[#6:8]-[c:9]1:[n:11]:[n:12]:[c:7](-[C:6]-[C:5]-[N:4]-[C:2](-[#6:1])=[O:3]):[s:10]:1."
)
rxn

In [None]:
i = Chem.MolFromSmiles(con.get_smiles("I2"))
i

In [None]:
m = Chem.MolFromSmiles(con.get_smiles("M2"))
m

In [None]:
t = Chem.MolFromSmiles(con.get_smiles("T20"))
t

In [None]:
rxn

In [None]:
prods = rxn.RunReactants((i,m,t))

In [None]:
prods[0][0]

In [None]:
prods

In [None]:
pat = rxn.GetReactantTemplate(1)

m.GetSubstructMatches(pat)
m

In [None]:
smi = "F[B-](F)(F)[C:2]([c:1]1[cH:13][cH:15][cH:18][cH:17][c:14]1[CH3:16])=[O:3].O=C1OC2(CCCCC2)O[C:7]12O[NH:4][C:5]1([CH2:6]2)[CH2:19][S:21][CH2:20]1.[c:8]1([C:9](=[S:10])[NH:11][NH2:12])[cH:22][cH:24][c:26]2[c:25]([cH:23]1)[O:27][CH2:29][O:28]2>>[c:1]1([C:2](=[O:3])[NH:4][C:5]2([CH2:6][c:7]3[s:10][c:9](-[c:8]4[cH:22][cH:24][c:26]5[c:25]([cH:23]4)[O:27][CH2:29][O:28]5)[n:11][n:12]3)[CH2:19][S:21][CH2:20]2)[cH:13][cH:15][cH:18][cH:17][c:14]1[CH3:16]"

rxn = ReactionFromSmarts(smi, useSmiles=True)
rxn

In [None]:
df = pd.read_csv("../../data/curated_data/synferm_dataset_2023-09-05_40018records.csv")[["product_A_smiles", "reaction_smiles_atom_mapped", "experiment_id"]]

In [None]:
Chem.MolFromSmiles(df.loc[df.experiment_id == "56113", "product_A_smiles"].item())

In [None]:
Chem.MolToSmiles(con.get_vl_member(35080))

In [None]:
# reactants to product H
_rxn_abt_H = "[$(B(-F)(-F)-F)]-[$(C-[#6])X3:1]=[O:2].O=C1-O-[$(C2CCCCC2)]-O-[C:6]-1-1-[C:5](-[H])-[C:4]-[NH1:3]-O-1.[NH2:7]-[c:8]1:[c:9]:[c:10]:[c:11]:[c:12]:[c:13]:1-[SH1:14]>>[C:4]=[C:5]-[c:6]:1:[n:7]:[c:8]2:[c:9]:[c:10]:[c:11]:[c:12]:[c:13]:2:[s:14]:1"
_rxn_th_H = "[$(B(-F)(-F)-F)]-[$(C-[#6])X3:1]=[O:2].O=C1-O-[$(C2CCCCC2)]-O-[C:6]-1-1-[C:5](-[H])-[C:4]-[NH1:3]-O-1.[C:7](=[S:8])-[NH1:9]-[NH2:10]>>[s:8]:1:[c:7]:[n:9]:[n:10]:[c:6]:1-[C:5]=[C:4]"


In [None]:
rxn = ReactionFromSmarts(_rxn_abt_H)
rxn

In [None]:
rxn = ReactionFromSmarts(_rxn_th_H)
rxn

In [None]:
initiators.loc[initiators.long == "2-Pyr002"]

In [None]:
monomers.loc[monomers.long == "Fused002"]

In [None]:
terminators.loc[terminators.long == "TerABT001"]

In [None]:
Chem.AddHs(desalt_building_block(Chem.MolFromSmiles(monomers.at[134, "SMILES"])))

In [None]:
terminators.at[78, "SMILES"]

In [None]:
from rdkit.Chem.rdChemReactions import SanitizeRxn

In [None]:
rxn.Initialize()
#SanitizeRxn(rxn)

In [None]:
rxn

In [None]:
prods = rxn.RunReactants(
    [desalt_building_block(Chem.MolFromSmiles(initiators.at[3, "SMILES"])), 
     Chem.AddHs(desalt_building_block(Chem.MolFromSmiles(monomers.at[78, "SMILES"]))), 
     desalt_building_block(Chem.MolFromSmiles(terminators.at[152, "SMILES"]))
    ]
)

In [None]:
prods

In [None]:
p = prods[0][0]
p

In [None]:
Chem.rdmolops.RemoveAllHs(p)

In [None]:
p = Chem.rdmolops.RemoveHs(p)

In [None]:
Chem.SanitizeMol(p)

In [None]:
p

In [None]:
Chem.MolFromSmiles('[H]C1=[C@]([H])(c2nc3ccccc3s2)C([H])([H])N(C(=O)OC([H])([H])C([H])([H])c2c([H])c([H])c(OC([H])([H])[H])c(OC([H])([H])[H])c2[H])C1([H])[H]', sanitize=False)

In [None]:
Chem.MolToSmiles(p)

In [None]:
p.UpdatePropertyCache()

In [None]:
p.GetAtomWithIdx(5).GetTotalValence()

In [None]:
p.GetAtomWithIdx(5).GetNumExplicitHs()

In [None]:
p = Chem.rdmolops.AddHs(p)

In [None]:
p

In [None]:
p = Chem.rdmolops.RemoveHs(p, sanitize=False, updateExplicitCount=True)

In [None]:
Chem.rdmolops.Cleanup(p)

In [None]:
Chem.SanitizeMol(p)

In [None]:
a.GetNeighbors()

In [None]:
Chem.MolToSmiles(Chem.MolFromSmiles("C(=Cc1cccnc1)c1nnc(-c2cccc3[nH]ncc23)s1"))

In [None]:
Chem.MolToSmiles(Chem.MolFromSmiles("C(=Cc1nnc(-c2cccc3[nH]ncc23)s1)c1cccnc1"))

In [None]:
from src.library_design.reaction_generator import SFReactionGenerator

In [None]:
rxn_generator = SFReactionGenerator()

In [None]:
[Chem.MolToSmiles(smi) for smi in rxn_generator.generate_reactants(Chem.MolToSmiles(con.get_vl_member(35080)))]

In [None]:
rxn_generator.backwards_reactions["th"]

In [None]:
_backwards_rxn_abt = "[$(C-[#6]):1](=O)-[NR0]-[C:2]-[C:3]-c1nc2[c:4][c:5][c:6][c:7]c2s1>>F-[B-](-F)(-F)-[C:1]=O.O=C1-O-C2(-C-C-C-C-C-2)-O-C-1-1-[C:3]-[C:2]-N-O1.N-c1:[c:4]:[c:5]:[c:6]:[c:7]:c:1-S"


In [None]:
_backwards_rxn_th = "[c:4]1nnc(-[C:3]-[C:2]-[NR0]-[$(C-[#6]):1]=O)s1>>F-[B-](-F)(-F)-[C:1]=O.O=C1-O-C2(-C-C-C-C-C-2)-O-C-1-1-[C:3]-[C:2]-N-O1.[C:4](=S)-N-N"


In [None]:
rxn = ReactionFromSmarts(_backwards_rxn_abt)
rxn

In [None]:
prods = rxn.RunReactants((con.get_vl_member(35080),))

In [None]:
prods[0][0]

In [None]:
_rxn_abt = "[$(B(-F)(-F)-F)]-[$(C-[#6])X3:1]=[O:2].O=C1-O-[$(C2CCCCC2)]-O-[C:6]-1-1-[C:5]-[C:4]-[NH1:3]-O-1.[NH2:7]-[c:8]1:[c:9]:[c:10]:[c:11]:[c:12]:[c:13]:1-[SH1:14]>>[C:1](=[O:2])-[N:3]-[C:4]-[C:5]-[c:6]1:[n:7]:[c:8]2:[c:9]:[c:10]:[c:11]:[c:12]:[c:13]:2:[s:14]:1."
rxn = ReactionFromSmarts(_rxn_abt)

In [None]:
_rxn_th = "[$(B(-F)(-F)-F)]-[$(C-[#6])X3:1]=[O:2].O=C1-O-[$(C2CCCCC2)]-O-[C:6]-1-1-[C:5]-[C:4]-[NH1:3]-O-1.[C:7](=[S:8])-[NH1:9]-[NH2:10]>>[c:7]1:[n:9]:[n:10]:[c:6](-[C:5]-[C:4]-[N:3]-[C:1]=[O:2]):[s:8]:1."
rxn = ReactionFromSmarts(_rxn_th)

In [None]:
rxn

In [None]:
from src.util.rdkit_util import desalt_building_block

In [None]:
for atom in rxn.GetReactantTemplate(0).GetAtoms():
    print(atom.GetSymbol())

In [None]:
rxn.Initialize()

In [None]:
Chem.rdChemReactions.SanitizeRxn(rxn)

In [None]:
# Prepare atom indices for drawing
for i, atom in enumerate(mol.GetAtoms()):
    atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
mol

In [None]:
prods = rxn.RunReactants(
    [desalt_building_block(Chem.MolFromSmiles(initiators.at[0, "SMILES"])), 
     desalt_building_block(Chem.MolFromSmiles(monomers.at[78, "SMILES"])), 
     desalt_building_block(Chem.MolFromSmiles(terminators.at[172, "SMILES"]))
    ]
)

In [None]:
mol = prods[0][0]
Chem.SanitizeMol(mol)

In [None]:
mol

In [None]:
Chem.MolFromSmiles(initiators.at[0, "SMILES"])

In [None]:
terminators

In [None]:
ReactionFromSmarts(
    "F[B-](F)(F)[C:2]([c:1]1[cH:13][cH:15][cH:17][c:16]([O:18][CH3:19])[n:14]1)=[O:3].O=C1OC2(CCCCC2)O[C:7]12O[NH:4][CH2:5][C:6]21[CH2:20][CH2:22][CH2:21]1.[c:8]1([C:9](=[S:10])[NH:11][NH2:12])[cH:23][n:25][nH:26][cH:24]1>>[c:1]1([C:2](=[O:3])[NH:4][CH2:5][C:6]2([c:7]3[s:10][c:9](-[c:8]4[cH:23][n:25][nH:26][cH:24]4)[n:11][n:12]3)[CH2:20][CH2:22][CH2:21]2)[cH:13][cH:15][cH:17][c:16]([O:18][CH3:19])[n:14]1",
    useSmiles=True
)

In [None]:
ReactionFromSmarts(
    "F[B-](F)(F)[C:1](=[O:2])[c:11]1[cH:12][cH:14][cH:16][c:15]([O:17][CH3:18])[n:13]1.O=C1OC2(CCCCC2)O[C:6]12O[NH:3][CH2:4][C:5]21[CH2:19][CH2:21][CH2:20]1.[C:7](=[S:8])([NH:9][NH2:10])[c:22]1[cH:23][n:25][nH:26][cH:24]1>>[C:1](=[O:2])([NH:3][CH2:4][C:5]1([c:6]2[s:8][c:7](-[c:22]3[cH:23][n:25][nH:26][cH:24]3)[n:9][n:10]2)[CH2:19][CH2:21][CH2:20]1)[c:11]1[cH:12][cH:14][cH:16][c:15]([O:17][CH3:18])[n:13]1",
    useSmiles=True
)