# VL frontend

Enter the SMILES of a compound in the SLAP virtual library and see the predicted properties + synthesizability.

In [None]:
# input (change this to the SMILES you are interested in)
# do not include stereochemistry descriptors on the central heterocycle

query = "Cc1cnn(C)c1C1COC(C)C(c2ccc3cc(F)c(F)cc3n2)N1"


In [None]:
import pathlib
import sys
sys.path.append(str(pathlib.Path().resolve().parent))

import IPython.display
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw, rdChemReactions

from src.util import remove_mapno_from_reaction, canonicalize_smiles

In [None]:
# import all smiles in the VL
vl_dir = pathlib.Path().resolve().parent / "data" / "Data S5"
dfs = []
for i in range(1749):
    dfs.append(pd.read_csv(vl_dir / f"VL_chunk_{i:04}_smiles.csv.bz2"))
df_allsmiles = pd.concat(dfs)

In [None]:
# check if the queried smiles is in the VL
canonical_query = canonicalize_smiles(query)
try:
    mol_number = int(df_allsmiles.loc[df_allsmiles["smiles"] == canonical_query, "mol_number"])
except TypeError:
    print("The input SMILES was not found in the virtual library")

In [None]:
# determine which chunk the data we are looking for is in
chunk = mol_number // 10000

In [None]:
# read only the file for the relevant chunk
df_props = pd.read_csv(vl_dir / f"VL_chunk_{chunk:04}.csv.bz2")

In [None]:
# extract the queried compound
df_compound = df_props.loc[df_props["mol_number"] == mol_number]

In [None]:
# draw the structure for control
mol = Chem.MolFromSmiles(df_compound.at[0,"smiles"])
Draw.MolToImage(mol)

In [None]:
# Print the predicted properties
print(f"Predicted properties for {query}:")
print(f"Predicted logD at pH 7.4: {df_compound.at[0,'predicted_logD_pH7.4']:.1f}")
print(f"Predicted aq solubility at pH 6.8: {10 ** df_compound.at[0,'predicted_logSolubility_pH6.8_(mM)']:.3f} mM")
print(f"Predicted pKa: {df_compound.at[0,'center1_pKa']:.1f}")

In [None]:
# Show full pKa information (there may be multiple ionizable centers)
print(df_compound[[s for s in df_compound.columns if s.startswith("center")]].dropna(axis=1).iloc[0])

In [None]:
outcomes = {0: "failure", 1: "success"}
confidences = {0: "known reaction", 1: "very high", 2: "high", 3: "moderate", 4: "low"}

print("Reactions leading to this compound:\n")
# todo draw the reaction
for i, rxn_smiles in enumerate([df_compound.at[0,'rxn1_smiles'], df_compound.at[0,'rxn2_smiles']]):
    if isinstance(rxn_smiles, str):
        reaction_outcome = int(df_compound.at[0, f'rxn{i+1}_predictions'])
        prediction_confidence = int(df_compound.at[0, f'rxn{i+1}_confidence'])
        print(rxn_smiles)
        rxn = rdChemReactions.ReactionFromSmarts(rxn_smiles, useSmiles=True)
        remove_mapno_from_reaction(rxn)
        d2d = Draw.MolDraw2DCairo(800,300)
        d2d.DrawReaction(rxn)
        d2d.FinishDrawing()
        p = d2d.GetDrawingText()
        i = IPython.display.Image(p)
        display(i)
        print(f"Predicted outcome: {reaction_outcome} ({outcomes[reaction_outcome]})")
        print(f"Prediction confidence: {prediction_confidence} ({confidences[prediction_confidence]})")