# Docking

In [2]:
from rdkit import Chem
from rdkit.Chem.SaltRemover import SaltRemover
from rdkit.Chem import Lipinski
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Descriptors

import dockstring

import re
import csv
import pandas as pd
import numpy as np
import gzip


from rdkit import RDLogger
RDLogger.DisableLog('rdApp.error')

In [3]:
# function for chembl standardization pipeline

from chembl_structure_pipeline import standardizer

def standardize_smiles(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        mol = standardizer.standardize(mol)
        smiles = Chem.MolToSmiles(mol)
    except:
        pass
    return smiles

[12:34:30] Initializing Normalizer


In [4]:
# load dataset
suppl = Chem.SDMolSupplier("../data/drugbank.sdf")
drug_bank = [[mol, 'drugbank'] for mol in suppl if mol]

with gzip.open("../data/actives_final.sdf.gz") as sdf:
    supp_actives = Chem.ForwardSDMolSupplier(sdf)
    actives = [[mol, 'actives'] for mol in supp_actives if mol]

with gzip.open("../data/decoys_final.sdf.gz") as sdf:
    supp_decoys = Chem.ForwardSDMolSupplier(sdf)
    decoys = [[mol, 'decoys'] for mol in supp_decoys if mol]

In [5]:
df_db = pd.DataFrame(drug_bank, columns=['Mol', 'Source'])
df_ac = pd.DataFrame(actives, columns=['Mol', 'Source'])
df_dc = pd.DataFrame(decoys, columns=['Mol', 'Source'])

df = pd.concat([df_db, df_ac, df_dc], ignore_index=True)
df

Unnamed: 0,Mol,Source
0,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc0b0>,drugbank
1,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc120>,drugbank
2,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc190>,drugbank
3,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc200>,drugbank
4,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc270>,drugbank
...,...,...
42177,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a38b0>,decoys
42178,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3920>,decoys
42179,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3990>,decoys
42180,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3a00>,decoys


In [6]:
# standardize df 
df['Mol_standard'] = df['Mol'].apply(standardize_smiles)


In [7]:
df

Unnamed: 0,Mol,Source,Mol_standard
0,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc0b0>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc0b0>
1,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc120>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc120>
2,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc190>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc190>
3,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc200>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc200>
4,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc270>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc270>
...,...,...,...
42177,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a38b0>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a38b0>
42178,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3920>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3920>
42179,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3990>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3990>
42180,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3a00>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3a00>


In [8]:
# new column standard smiles
df['smiles_standard'] = df['Mol_standard'].apply(Chem.MolToSmiles)

In [9]:
df

Unnamed: 0,Mol,Source,Mol_standard,smiles_standard
0,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc0b0>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc0b0>,CC[C@H](C)[C@H](NC(=O)[C@H](CCC(=O)O)NC(=O)[C@...
1,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc120>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc120>,CC(C)C[C@H](NC(=O)[C@@H](COC(C)(C)C)NC(=O)[C@H...
2,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc190>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc190>,N=C(N)NCCC[C@H](NC(=O)[C@@H]1CCCN1C(=O)[C@@H]1...
3,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc200>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc200>,CC(=O)N[C@H](Cc1ccc2ccccc2c1)C(=O)N[C@H](Cc1cc...
4,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc270>,drugbank,<rdkit.Chem.rdchem.Mol object at 0x7fd5998dc270>,C/C=C/C[C@@H](C)[C@@H](O)[C@H]1C(=O)N[C@@H](CC...
...,...,...,...,...
42177,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a38b0>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a38b0>,CCN(C)c1ccc(C(=O)Nc2ccc(CC(=O)NC(C)C)cc2)cc1
42178,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3920>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3920>,Cc1ccccc1CN(C)C(=O)CCCC[C@H]1SC[C@H]2NC(=O)N[C...
42179,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3990>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3990>,CSc1ccc(CNC(=O)C(=O)Nc2ccc(OC(F)F)cc2)cc1
42180,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3a00>,decoys,<rdkit.Chem.rdchem.Mol object at 0x7fd5992a3a00>,C[C@H](Cc1ccco1)NC(=O)C(=O)Nc1cccc(SC(F)F)c1


# Docking prep

In [10]:
def run_dockstring(target, ligand_smiles):
    """wrapper around Dockstring to return None if the process fails"""
    try:
        docking = target.dock(ligand_smiles)
        return docking
    except:
        return None, None

In [11]:
target = dockstring.load_target('DRD3')
run_dockstring(target, 'CCO')



(-2.6,
 {'ligand': <rdkit.Chem.rdchem.Mol at 0x7fd598d90ae0>,
  'affinities': [-2.6, -2.6, -2.5, -2.5, -2.4, -2.3, -2.2, -2.0, -2.0]})

(-2.6,
 * {'ligand': <rdkit.Chem.rdchem.Mol at 0x7f2944656a70>,
 * 'affinities': [-2.6, -2.6, -2.5, -2.5, -2.4, -2.3, -2.2, -2.0, -2.0]})

In [12]:
# stratified sampling, 10 from each category
df_sample = df.groupby('Source').apply(lambda x: x.sample(n=10, random_state=42)).reset_index(drop=True)
df_sample

Unnamed: 0,Mol,Source,Mol_standard,smiles_standard
0,<rdkit.Chem.rdchem.Mol object at 0x7fd599854200>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599854200>,O=C(NCCCCCCN1CCc2c(c3cc(F)ccc3n2-c2ccc(F)cc2)C...
1,<rdkit.Chem.rdchem.Mol object at 0x7fd599854c80>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599854c80>,COc1ccccc1N1CCN(Cc2ccn(-c3ccccc3)c2)CC1
2,<rdkit.Chem.rdchem.Mol object at 0x7fd59983b3e0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd59983b3e0>,COc1ccccc1N1CC[NH+](CCCCNC(=O)c2ccc(-c3cccc(F)...
3,<rdkit.Chem.rdchem.Mol object at 0x7fd59983ae30>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd59983ae30>,CC(C)(C)c1nc(N2CC[NH+](CCCSc3nc(=O)cc[nH]3)CC2...
4,<rdkit.Chem.rdchem.Mol object at 0x7fd599840900>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599840900>,COc1ccccc1N1CCN(CCCOc2ccc(-c3cn4ccccc4[nH+]3)c...
5,<rdkit.Chem.rdchem.Mol object at 0x7fd599841540>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599841540>,CCC[N@H+](CCN1CCN(c2[nH+]ccc3ccccc23)CC1)[C@H]...
6,<rdkit.Chem.rdchem.Mol object at 0x7fd599842500>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599842500>,COc1cc(NC(=O)C2CC2)c(Cl)cc1C(=O)N[C@H]1CC[N@@H...
7,<rdkit.Chem.rdchem.Mol object at 0x7fd599856dc0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599856dc0>,O=S(=O)(N[C@H]1CC[C@H](CC[NH+]2CCC(Cc3cccc(C(F...
8,<rdkit.Chem.rdchem.Mol object at 0x7fd5998404a0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd5998404a0>,COc1ccccc1N1CCN(CCCCOc2ccc([C@H]3C[n+]4ccsc4N3...
9,<rdkit.Chem.rdchem.Mol object at 0x7fd5998359a0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd5998359a0>,CC(C)(C)c1nc(N2CC[NH+](CCCCn3ccc(N4CCCCC4)nc3=...


In [14]:
df_sample['Docking'] = df_sample.apply(lambda x: run_dockstring(target, x['smiles_standard']), axis=1)
df_sample

[13:13:12] Explicit valence for atom # 19 N, 4, is greater than permitted


Unnamed: 0,Mol,Source,Mol_standard,smiles_standard,Docking
0,<rdkit.Chem.rdchem.Mol object at 0x7fd599854200>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599854200>,O=C(NCCCCCCN1CCc2c(c3cc(F)ccc3n2-c2ccc(F)cc2)C...,"(-9.5, {'ligand': <rdkit.Chem.rdchem.Mol objec..."
1,<rdkit.Chem.rdchem.Mol object at 0x7fd599854c80>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599854c80>,COc1ccccc1N1CCN(Cc2ccn(-c3ccccc3)c2)CC1,"(-9.1, {'ligand': <rdkit.Chem.rdchem.Mol objec..."
2,<rdkit.Chem.rdchem.Mol object at 0x7fd59983b3e0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd59983b3e0>,COc1ccccc1N1CC[NH+](CCCCNC(=O)c2ccc(-c3cccc(F)...,"(-9.6, {'ligand': <rdkit.Chem.rdchem.Mol objec..."
3,<rdkit.Chem.rdchem.Mol object at 0x7fd59983ae30>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd59983ae30>,CC(C)(C)c1nc(N2CC[NH+](CCCSc3nc(=O)cc[nH]3)CC2...,"(-9.3, {'ligand': <rdkit.Chem.rdchem.Mol objec..."
4,<rdkit.Chem.rdchem.Mol object at 0x7fd599840900>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599840900>,COc1ccccc1N1CCN(CCCOc2ccc(-c3cn4ccccc4[nH+]3)c...,"(-9.5, {'ligand': <rdkit.Chem.rdchem.Mol objec..."
5,<rdkit.Chem.rdchem.Mol object at 0x7fd599841540>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599841540>,CCC[N@H+](CCN1CCN(c2[nH+]ccc3ccccc23)CC1)[C@H]...,"(-10.6, {'ligand': <rdkit.Chem.rdchem.Mol obje..."
6,<rdkit.Chem.rdchem.Mol object at 0x7fd599842500>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599842500>,COc1cc(NC(=O)C2CC2)c(Cl)cc1C(=O)N[C@H]1CC[N@@H...,"(-9.8, {'ligand': <rdkit.Chem.rdchem.Mol objec..."
7,<rdkit.Chem.rdchem.Mol object at 0x7fd599856dc0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599856dc0>,O=S(=O)(N[C@H]1CC[C@H](CC[NH+]2CCC(Cc3cccc(C(F...,"(-11.7, {'ligand': <rdkit.Chem.rdchem.Mol obje..."
8,<rdkit.Chem.rdchem.Mol object at 0x7fd5998404a0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd5998404a0>,COc1ccccc1N1CCN(CCCCOc2ccc([C@H]3C[n+]4ccsc4N3...,"(-8.5, {'ligand': <rdkit.Chem.rdchem.Mol objec..."
9,<rdkit.Chem.rdchem.Mol object at 0x7fd5998359a0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd5998359a0>,CC(C)(C)c1nc(N2CC[NH+](CCCCn3ccc(N4CCCCC4)nc3=...,"(-10.3, {'ligand': <rdkit.Chem.rdchem.Mol obje..."


In [23]:
# in Docking column is a tuple, first element is the docking score, second is the docking object
# create column with docking score
df_sample['Docking_score'] = df_sample['Docking'].apply(lambda x: x[0])
df_sample

Unnamed: 0,Mol,Source,Mol_standard,smiles_standard,Docking,Docking_score
0,<rdkit.Chem.rdchem.Mol object at 0x7fd599854200>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599854200>,O=C(NCCCCCCN1CCc2c(c3cc(F)ccc3n2-c2ccc(F)cc2)C...,"(-9.5, {'ligand': <rdkit.Chem.rdchem.Mol objec...",-9.5
1,<rdkit.Chem.rdchem.Mol object at 0x7fd599854c80>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599854c80>,COc1ccccc1N1CCN(Cc2ccn(-c3ccccc3)c2)CC1,"(-9.1, {'ligand': <rdkit.Chem.rdchem.Mol objec...",-9.1
2,<rdkit.Chem.rdchem.Mol object at 0x7fd59983b3e0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd59983b3e0>,COc1ccccc1N1CC[NH+](CCCCNC(=O)c2ccc(-c3cccc(F)...,"(-9.6, {'ligand': <rdkit.Chem.rdchem.Mol objec...",-9.6
3,<rdkit.Chem.rdchem.Mol object at 0x7fd59983ae30>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd59983ae30>,CC(C)(C)c1nc(N2CC[NH+](CCCSc3nc(=O)cc[nH]3)CC2...,"(-9.3, {'ligand': <rdkit.Chem.rdchem.Mol objec...",-9.3
4,<rdkit.Chem.rdchem.Mol object at 0x7fd599840900>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599840900>,COc1ccccc1N1CCN(CCCOc2ccc(-c3cn4ccccc4[nH+]3)c...,"(-9.5, {'ligand': <rdkit.Chem.rdchem.Mol objec...",-9.5
5,<rdkit.Chem.rdchem.Mol object at 0x7fd599841540>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599841540>,CCC[N@H+](CCN1CCN(c2[nH+]ccc3ccccc23)CC1)[C@H]...,"(-10.6, {'ligand': <rdkit.Chem.rdchem.Mol obje...",-10.6
6,<rdkit.Chem.rdchem.Mol object at 0x7fd599842500>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599842500>,COc1cc(NC(=O)C2CC2)c(Cl)cc1C(=O)N[C@H]1CC[N@@H...,"(-9.8, {'ligand': <rdkit.Chem.rdchem.Mol objec...",-9.8
7,<rdkit.Chem.rdchem.Mol object at 0x7fd599856dc0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd599856dc0>,O=S(=O)(N[C@H]1CC[C@H](CC[NH+]2CCC(Cc3cccc(C(F...,"(-11.7, {'ligand': <rdkit.Chem.rdchem.Mol obje...",-11.7
8,<rdkit.Chem.rdchem.Mol object at 0x7fd5998404a0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd5998404a0>,COc1ccccc1N1CCN(CCCCOc2ccc([C@H]3C[n+]4ccsc4N3...,"(-8.5, {'ligand': <rdkit.Chem.rdchem.Mol objec...",-8.5
9,<rdkit.Chem.rdchem.Mol object at 0x7fd5998359a0>,actives,<rdkit.Chem.rdchem.Mol object at 0x7fd5998359a0>,CC(C)(C)c1nc(N2CC[NH+](CCCCn3ccc(N4CCCCC4)nc3=...,"(-10.3, {'ligand': <rdkit.Chem.rdchem.Mol obje...",-10.3
