In [None]:
from rdkit import Chem
from rdkit.Chem import PandasTools
from rdkit.Chem.Descriptors import MolWt, MolLogP, TPSA, NumRotatableBonds, NumHDonors, NumHAcceptors
from rdkit import RDLogger
import mols2grid

PandasTools.RenderImagesInAllDataFrames(images=True)

lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL)

def contains_vina_unsupported_atoms(mol):
    atoms = mol.GetAtoms()
    for at in atoms:
        n = at.GetAtomicNum()
        # https://github.com/ccsb-scripps/AutoDock-Vina/blob/6af928b9d5f772d26f7f02f7f219e1ea7d97ed05/src/lib/atom_constants.h
        if n not in [1,6,7,8,15,16,9,17,35,53,14,85]: #H,C,N,O,P,S,F,Cl,Br,I,Si,At 
            return True
    return False

# Load downloaded ZINC *.sdf file

In [None]:
df = PandasTools.LoadSDF("ZINC15_fda.sdf", embedProps=True, removeHs=False)

In [None]:
mols2grid.display(df, mol_col='ROMol')

In [None]:
descs       = [ MolWt,  MolLogP,  NumHDonors,  NumHAcceptors,  NumRotatableBonds,  TPSA]
descs_names = ["MolWt","MolLogP","NumHDonors","NumHAcceptors","NumRotatableBonds","TPSA"]

for desc, desc_name in zip(descs, descs_names):
    df[desc_name] = df["ROMol"].apply(desc)
df = df.sort_values(by="MolWt")

In [None]:
#Filter compounds, modified Ro5
df = df[(df["MolWt"] > 150)]
df = df[(df["MolWt"] <= 500)]
df = df[(df["MolLogP"] <= 5)]
df = df[(df["NumHAcceptors"] <= 10)]
df = df[(df["NumHDonors"] <= 5)]

df = df[(df["NumRotatableBonds"] < 10)]

df = df[~df["ROMol"].apply(contains_vina_unsupported_atoms)]
print(len(df))

In [None]:
acids = Chem.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
arr_bool = [m.HasSubstructMatch(acids) for m in df["ROMol"]]
df = df[arr_bool]

In [None]:
print(len(df))
mols2grid.display(df, mol_col='ROMol')

In [None]:
properties=None  #list(df.columns)
PandasTools.WriteSDF(df, "ZINC15_fda_acids.sdf", molColName='ROMol', idName="zinc_id", properties=properties, allNumeric=False)

In [None]:
#Protonate at pH=7.4
!obabel -isdf ZINC15_fda_acids.sdf -O ZINC15_fda_Ro5_acids_7.4.sdf -p 7.4 --unique inchi
#--filter 'MW<=500 logP<=5 HBD<=5 HBA2<=10 abonds>1 rotors<10 (s="[$([C,S](=[O,S,P])-[O;H1,-1])]"'


In [None]:
df_ph74 = PandasTools.LoadSDF("ZINC15_fda_Ro5_acids_7.4.sdf", embedProps=True, removeHs=False)
print(len(df_ph74))
mols2grid.display(df_ph74, mol_col="ROMol", removeHs=True,
                  subset=["mols2grid-id","img", "zinc_id"],)

In [None]:
#Convert ligands to PDBQT using meeko
!mk_prepare_ligand.py -i ZINC15_fda_Ro5_acids_7.4.sdf --multimol_outdir Ligands