In [1]:
import numpy as np
import pandas as pd
import deepchem as dc
import os
import json

from deepchem.utils.docking_utils import prepare_inputs
from openmm.app                   import PDBFile
from pdbfixer                     import PDBFixer
from rdkit                        import Chem
from rdkit.Chem                   import AllChem

from p2rank_pocket_finder import P2RankPocketFinder

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading some PyTorch models, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'torch'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


In [2]:
chembl = pd.read_table('data/chembl/chembl_33_chemreps.txt', sep='\t')

In [16]:
chembl

Unnamed: 0,chembl_id,canonical_smiles,standard_inchi,standard_inchi_key
0,CHEMBL153534,Cc1cc(-c2csc(N=C(N)N)n2)cn1C,InChI=1S/C10H13N5S/c1-6-3-7(4-15(6)2)8-5-16-10...,MFRNFCWYPYSFQQ-UHFFFAOYSA-N
1,CHEMBL440060,CC[C@H](C)[C@H](NC(=O)[C@H](CC(C)C)NC(=O)[C@@H...,InChI=1S/C123H212N44O34S/c1-19-63(12)96(164-11...,RSEQNZQKBMRQNM-VRGFNVLHSA-N
2,CHEMBL440245,CCCC[C@@H]1NC(=O)[C@@H](NC(=O)[C@H](CC(C)C)NC(...,InChI=1S/C160H268N50O41/c1-23-27-41-95-134(228...,FTKBTEIKPOYCEX-OZSLQWTKSA-N
3,CHEMBL440249,CC(C)C[C@@H]1NC(=O)CNC(=O)[C@H](c2ccc(O)cc2)NC...,InChI=1S/C124H154ClN21O39/c1-57(2)48-81-112(17...,UYSXXKGACMHPIM-KFGDMSGDSA-N
4,CHEMBL405398,Brc1cccc(Nc2ncnc3ccncc23)c1NCCN1CCOCC1,InChI=1S/C19H21BrN6O/c20-15-2-1-3-17(18(15)22-...,VDSXZXJEWIWBCG-UHFFFAOYSA-N
...,...,...,...,...
2372669,CHEMBL4298696,CCCCCCCCCCCCCCCCCCPCCCCCCCCCCCCCC.F[PH](F)(F)(...,InChI=1S/C32H67P.F6HP/c1-3-5-7-9-11-13-15-17-1...,ZAKUDCIPPLAGQL-UHFFFAOYSA-N
2372670,CHEMBL4298698,C[n+]1cn([C@@H]2O[C@H](CO[P@@](=O)(S)OP(=O)([O...,InChI=1S/C11H18N5O13P3S/c1-15-3-16(8-5(15)9(19...,OTIKKVINVWNBOQ-LDJOHHLFSA-N
2372671,CHEMBL4298702,c1ccc(C2CC(C3CC(c4ccccc4)OC(c4ccccc4)C3)CC(c3c...,InChI=1S/C34H34O2/c1-5-13-25(14-6-1)31-21-29(2...,NZIGZXNUFVMHNV-UHFFFAOYSA-N
2372672,CHEMBL4298703,CSCC[C@H](NC=O)C(=O)N[C@@H](CCCNC(=N)NS(=O)(=O...,InChI=1S/C78H107N18O21PS2/c1-43-44(2)65(45(3)5...,IIHLOGWTFCCTPB-WTIPWMETSA-N


In [3]:
ligands10 = chembl['canonical_smiles'].iloc[0:10]
protein   = 'proteins/slc6a19.pdb'

In [4]:
complex_mol_array = []
scores_matrix     = []

# Run Autodock Vina

In [5]:
%%time

for count in range(0, 3):
    
    print("Docking ligand "+ str(count + 1))
    ligand = ligands10[count]
    p, m = None, None

    try:
        print('Preparing inputs ...')
        p, m = prepare_inputs(protein, ligand, pH = 7.4)
    except Exception as e:
        print(e)
        print('%s failed PDB fixing' % (protein)) 
        scores_matrix.append(['No scores: failed PDB fixing' + str(e)])
        complex_mol_array.append(['No molecules'])
    
    if p is not None and m is not None:  
        try:
            pocket_finder = P2RankPocketFinder(
                'p2rank_2.4.1/test_output/predict_slc6a19/slc6a19.pdb_predictions.csv',
                ligand_mol = m, threshold = 0.3, padding = 10.0)

            vpg = dc.dock.pose_generation.VinaPoseGenerator(pocket_finder = pocket_finder)
            Chem.rdmolfiles.MolToPDBFile(p, 'data/protein_%s.pdb' % (count))
            Chem.rdmolfiles.MolToPDBFile(m, 'data/ligand_%s.pdb' % (count))

            complexes, scores = vpg.generate_poses(
                molecular_complex=('data/protein_%s.pdb' % (count),'data/ligand_%s.pdb' % (count)), 
                out_dir = 'data', generate_scores = True, num_modes = 10, cpu = 10, seed = 123)

            print(scores)
            scores_matrix.append(scores)
            complex_mol_array.append(complexes)
        except Exception as e:
            print(m)
            print(e)
        
    else:
        scores_matrix.append(['No scores'])
        complex_mol_array.append(['No molecules'])




Docking ligand 1
Preparing inputs ...
Computing Vina grid ... done.
Performing docking (random seed: 1315866677) ... 
0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
   1       -4.774          0          0
   2       -4.764      1.526      1.966
   3       -4.728      3.248      6.807
   4       -4.653      4.141      6.215
   5       -4.639      4.728      6.292
   6       -4.473       1.64      2.106
   7       -4.469      4.068      6.561
   8       -4.448      2.089      2.363
   9       -4.368      3.506      5.673
  10       -4.312      3.814      4.826
Computing Vina grid ... done.
Performing docking (random seed: 1315866677) ... 
0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
*************



Bad Conformer Id
proteins/slc6a19.pdb failed PDB fixing
Docking ligand 3
Preparing inputs ...




Bad Conformer Id
proteins/slc6a19.pdb failed PDB fixing
CPU times: user 2min 32s, sys: 1.07 s, total: 2min 33s
Wall time: 1min 42s


In [6]:
complex_mol_array

[[(<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba5755480>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba57554e0>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba5755540>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba57555a0>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba5755600>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba5755660>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba57556c0>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba5755720>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba5755780>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mol at 0x7f1ba57557e0>),
  (<rdkit.Chem.rdchem.Mol at 0x7f1bb3ba87c0>,
   <rdkit.Chem.rdchem.Mo

In [22]:
with open('input/chembl_split_dir_1/scores_dict.pkl', 'rb') as file:
    scores = pickle.load(file)
    
with open('input/chembl_split_dir_1/ids.pkl', 'rb') as file:
    mols   = pickle.load(file)

In [23]:
scores

{'CHEMBL153534': [[-4.755,
   -4.752,
   -4.695,
   -4.627,
   -4.622,
   -4.457,
   -4.453,
   -4.453,
   -4.387,
   -4.381],
  [-4.407,
   -4.182,
   -4.118,
   -4.068,
   -3.841,
   -3.823,
   -3.752,
   -3.71,
   -3.699,
   -3.671]],
 'CHEMBL440060': 'Error'}

# Binding energies

Next, we can see that all the scores generated from Vina Pose Generator for the respective complexes are negative. This is because protein–ligand binding occurs only when the change in Gibbs free energy (ΔG) of the system is negative and more negative the free energy is the more stable the complex would be as show in Ref. Additionally, molecular docking evaluation based on the paper here showed that the binding affinities of all the derivatives range from (- 3.2 and -18.5 kcal/mol).



used 10 poses because of the paper

In [21]:
scores[0]

[-4.78,
 -4.713,
 -4.67,
 -4.605,
 -4.588,
 -4.495,
 -4.468,
 -4.356,
 -4.303,
 -4.144,
 -4.411,
 -4.08,
 -4.059,
 -4.056,
 -3.869,
 -3.722,
 -3.606,
 -3.605,
 -3.542,
 -3.498]

In [19]:
mols

0       CHEMBL153534
1       CHEMBL440060
2       CHEMBL440245
3       CHEMBL440249
4       CHEMBL405398
           ...      
995     CHEMBL502060
996    CHEMBL4116107
997     CHEMBL502072
998     CHEMBL502296
999     CHEMBL539383
Name: chembl_id, Length: 1000, dtype: object