In [2]:
import pandas as pd
import mols2grid
# import numpy
# import matplotlib
from rdkit import Chem
from rdkit.Chem import AllChem, Draw, PandasTools
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.Descriptors import MolWt, MolLogP, TPSA, NumRotatableBonds, NumHDonors, NumHAcceptors
from rdkit import DataStructs, RDLogger
PandasTools.RenderImagesInAllDataFrames(images=True)

lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL)

def contains_vina_unsupported_atoms(mol):
    atoms = mol.GetAtoms()
    for at in atoms:
        n = at.GetAtomicNum()
        # https://github.com/ccsb-scripps/AutoDock-Vina/blob/6af928b9d5f772d26f7f02f7f219e1ea7d97ed05/src/lib/atom_constants.h
        if n not in [1,6,7,8,15,16,9,17,35,53,14,85]: #H,C,N,O,P,S,F,Cl,Br,I,Si,At 
            return True
    return False

# Load downloaded ZINC *.sdf file

In [58]:
df = PandasTools.LoadSDF("ZINC15_fda.sdf", embedProps=True, removeHs=False)

In [59]:
mols2grid.display(df, mol_col='ROMol')

MolGridWidget()

In [60]:
descs       = [ MolWt,  MolLogP,  NumHDonors,  NumHAcceptors,  NumRotatableBonds,  TPSA]
descs_names = ["MolWt","MolLogP","NumHDonors","NumHAcceptors","NumRotatableBonds","TPSA"]

for desc, desc_name in zip(descs, descs_names):
    df[desc_name] = df["ROMol"].apply(desc)
df = df.sort_values(by="MolWt")

In [61]:
#Filter atoms
df = df[(df["MolWt"] > 150)]
df = df[(df["MolWt"] <= 500)]
df = df[(df["MolLogP"] <= 5)]
df = df[(df["NumHAcceptors"] <= 10)]
df = df[(df["NumHDonors"] <= 5)]

df = df[(df["NumRotatableBonds"] < 10)]

df = df[~df["ROMol"].apply(contains_vina_unsupported_atoms)]
print(len(df))

1027


In [62]:
acids = Chem.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
arr_bool = [m.HasSubstructMatch(acids) for m in df["ROMol"]]
df = df[arr_bool]

In [63]:
print(len(df))
mols2grid.display(df, mol_col='ROMol')

168


MolGridWidget()

In [64]:
# properties=list(df.columns)
properties=None
PandasTools.WriteSDF(df, "ZINC15_fda_acids.sdf", molColName='ROMol', idName="zinc_id", properties=properties, allNumeric=False)

In [65]:
!obabel -isdf ZINC15_fda_acids.sdf -O ZINC15_fda_Ro5_acids_7.4.sdf -p 7.4 --unique inchi
#--filter 'MW<=500 logP<=5 HBD<=5 HBA2<=10 abonds>1 rotors<10 (s="[$([C,S](=[O,S,P])-[O;H1,-1])]"'


*** Open Babel Error  in TetStereoToWedgeHash
  Failed to set stereochemistry as unable to find an available bond
*** Open Babel Error  in TetStereoToWedgeHash
  Failed to set stereochemistry as unable to find an available bond
*** Open Babel Error  in TetStereoToWedgeHash
  Failed to set stereochemistry as unable to find an available bond
168 molecules converted


In [68]:
df = PandasTools.LoadSDF("ZINC15_fda_Ro5_acids_7.4.sdf", embedProps=True, removeHs=False)
print(len(df))
mols2grid.display(df, mol_col="ROMol", removeHs=True,
                  subset=["mols2grid-id","img", "zinc_id"],)

166


MolGridWidget()

In [69]:
!mk_prepare_ligand.py -i ZINC15_fda_Ro5_acids_7.4.sdf --multimol_outdir Ligands

[11:21:09] Explicit valence for atom # 27 N, 4, is greater than permitted
[11:21:09] ERROR: Could not sanitize molecule ending on line 14402
[11:21:09] ERROR: Explicit valence for atom # 27 N, 4, is greater than permitted
[11:21:09] Explicit valence for atom # 27 N, 4, is greater than permitted
[11:21:09] ERROR: Could not sanitize molecule ending on line 14756
[11:21:09] ERROR: Explicit valence for atom # 27 N, 4, is greater than permitted
Input molecules processed: 166, skipped: 2
PDBQT files written: 166
No duplicate molecule filenames were found


In [9]:
mols2grid.display(PandasTools.LoadSDF("/home/ubuntu/Downloads/ZINC15_fda_h.sdf", embedProps=True, removeHs=False),
                  mol_col='ROMol')

MolGridWidget()