<a href="https://colab.research.google.com/github/porekhov/drug_design_2024/blob/main/docking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title **Install Conda Colab (ignore possible errors)**

%%capture
!pip install -q condacolab
import condacolab
condacolab.install()

In [None]:
#@title **Install rdkit, Vina (docking) and other dependences (ignore possible errors)**

%%capture
!conda install -c conda-forge rdkit py3dmol pdbfixer openbabel -y
!conda install -c schrodinger pymol -y
!conda install -c bioconda mgltools -y
# install Vina
!wget https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64 -O vina
!chmod u+x vina
%alias vina /content/vina
# also, install Qvina, a fast and accurate molecular docking tool,
# attained at accurately accelerating AutoDock Vina.
!git clone https://github.com/QVina/qvina
!chmod u+x /content/qvina/bin/qvina2.1
%alias qvina2 /content/qvina/bin/qvina2.1

In [None]:
# !!! you might need to run twice this cell !!!
from pymol import cmd

# set the PDB code of the target protein:
pdbid = '3E64'

# pdb1 file is a variant of pdb file in RCSB database
# which contains a single biological assembly as deposited by the authors
# and may differ from the deposited pdb file itself in the number of chains

cmd.fetch(code=pdbid,type='pdb1')
cmd.select(name='Prot',selection='polymer.protein')
cmd.select(name='Lig',selection='organic')

# the selection might need to be changed depending on the ligand/protein chains
# you are interested in. Check the corresponding pdb page.
# the selection string should be formated according to the Pymol selection algebra
# https://pymolwiki.org/index.php/Selection_Algebra

cmd.save(filename='receptor_clean.pdb',format='pdb',selection='Prot')
cmd.save(filename='pdb_lig.pdb',format='pdb',selection='Lig')
cmd.delete('all')

In [None]:
import py3Dmol

# py3Dmol allows to quickly visualize 3D structures

view = py3Dmol.view()
view.removeAllModels()
view.setViewStyle({'style':'outline','color':'black','width':0.1})

view.addModel(open('receptor_clean.pdb','r').read(),format='pdb')
Prot=view.getModel()
Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'})

view.addModel(open('pdb_lig.pdb','r').read(),format='pdb')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'greenCarbon','radius':0.2}})

view.zoomTo()
view.show()

In [None]:
# pdbfixer allows to prepare the protein structure for docking
# E.g., add the missing atoms, remove/fix non-standard atoms/residues

from pdbfixer import PDBFixer
from openmm.app import PDBFile

fix = PDBFixer(filename='receptor_clean.pdb')
# find missing residues
fix.findMissingResidues()
# find and replace nonstandard residues
fix.findNonstandardResidues()
fix.replaceNonstandardResidues()
# remove the garbage
fix.removeHeterogens(True)
# find and add missing atoms
fix.findMissingAtoms()
fix.addMissingAtoms()
# write an output file
PDBFile.writeFile(fix.topology, fix.positions, open('receptor_clean_fix.pdb', 'w'))

In [None]:
# for the docking run, we must define the center and dimensions of the rectangular box
# around the putative binding site
# in our case, the docking box is centered at the position of the native ligand from PDB
# with the size of +- 5 A from the edges of the native ligand
cmd.load(filename='receptor_clean_fix.pdb',format='pdb',object='prot')
cmd.load(filename='pdb_lig.pdb',format='pdb',object='lig')

([minX, minY, minZ],[maxX, maxY, maxZ]) = cmd.get_extent('lig')

minX = minX - 5.0
minY = minY - 5.0
minZ = minZ - 5.0
maxX = maxX + 5.0
maxY = maxY + 5.0
maxZ = maxZ + 5.0

SizeX = maxX - minX
SizeY = maxY - minY
SizeZ = maxZ - minZ
CenterX =  (maxX + minX)/2
CenterY =  (maxY + minY)/2
CenterZ =  (maxZ + minZ)/2

cmd.delete('all')

center = {'center_x':CenterX,'center_y': CenterY, 'center_z': CenterZ}
size = {'size_x':SizeX,'size_y': SizeY,'size_z': SizeZ}
print(center,'\n',size)

In [None]:
# finally, the fixed protein structure should be additionally converted to PDBQT format
# which also includes special Autodock Vina atom types and partial charges
# the flag -A hydrogens
# we're doing it with the Autodock Tools suite (installed as mgltools via conda)

!pythonsh /usr/local/bin/prepare_receptor4.py -r receptor_clean_fix.pdb -o receptor_clean_fix.pdbqt -A hydrogens -U nphs_lps -v

# ... and prepare the ligand as a pdbqt file with the Autodock as well

!pythonsh /usr/local/bin/prepare_ligand4.py -l pdb_lig.pdb -A bonds_hydrogens

In [None]:
# running docking in qvina2 (fast implementation of vina docking)
# apart from the pdbqt files for the target and ligand,
# you should specify the parameters of the docking box (size and center)
#
# the exhaustiveness affects the sampling completeness of the conformational
# space and is roughly proportional to the calculation time
# the random seed is set to assure the reproducibility of the docking poses
# cpu sets the number of available CPUs (2 is maximum at Colab)
#
# The output is a file containing the docking poses and a text based description
# of the docking results including an estimated affinity and the root mean
# square deviation (RMSD) of each pose from the pose with the highest predicted
# affinity

!/content/qvina/bin/qvina2.1 --receptor 'receptor_clean_fix.pdbqt' --ligand 'pdb_lig.pdbqt' \
--out 'pdb_lig_output.pdbqt' --exhaustiveness 8 --cpu 2 \
--center_x {CenterX} --center_y {CenterY} --center_z {CenterZ} \
--size_x {SizeX} --size_y {SizeY} --size_z {SizeZ} --seed 12345

# or alternatively you can run original vina

# !/content/vina --receptor 'receptor_clean_H.pdbqt' --ligand 'pdb_lig.pdbqt' \
# --out 'pdb_lig_output.pdbqt' --exhaustiveness 8 --verbosity 2 \
# --center_x {CenterX} --center_y {CenterY} --center_z {CenterZ} \
# --size_x {SizeX} --size_y {SizeY} --size_z {SizeZ} --cpu 2 --seed 12345

In [None]:
from openbabel import pybel

# the output file of vina with ligand poses in the binding site
# containts at most 9 poses (by default, you can change this behaviour
# using --num_modes flag)
#
# we can save each pose as an individual mol2 file for further visualization
# and analysis

dock_poses = list(pybel.readfile('pdbqt', 'pdb_lig_output.pdbqt'))
for i, dock_pose in enumerate(dock_poses):
    dock_pose.write(format='mol2', filename='pdb_lig_output_' + str(i) + '.mol2', overwrite=True)

In [None]:
# show the pose with the highest score (pdb_lig_output_0.mol2)

view = py3Dmol.view()
view.removeAllModels()
view.setViewStyle({'style':'outline','color':'black','width':0.1})

view.addModel(open('receptor_clean_fix.pdb','r').read(), format='pdb')
Prot=view.getModel()
Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'})


view.addModel(open('pdb_lig.pdb','r').read(),format='mol2')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'magentaCarbon','radius':0.2}})

# change pdb_lig_output_0.mol2 to pdb_lig_output_X.mol2 X=0,1,2,3...
# to visualize other poses

view.addModel(open('pdb_lig_output_0.mol2','r').read(),format='mol2')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'cyanCarbon','radius':0.2}})

view.zoomTo()
view.show()

In [None]:
from openbabel import pybel

# for the sdf file with multiple ligands, such as you will get from
# pharmit server after the pharmacophore search, you need a slightly different
# procedure

# read ligands found using the pharmacophore model, saved in query_results.sdf,
#  and save in pdbqt format
#
# DO NOT FORGET TO UPLOAD SDF FILE:
# 1. click Files menu in the left panel;
# 2. Upload to session storage;
# 3. Agree with the warning.
#
for i, mol in enumerate(pybel.readfile("sdf", "query_results.sdf")):
    fname = 'mol_' + str(i) +'.pdb'
    mol.write(format='pdb', filename=fname, overwrite=True)
    !pythonsh /usr/local/bin/prepare_ligand4.py -l {fname} -A bonds_hydrogens

In [None]:
# Write the code to dock the prepared 10 files
# and visualize the results for the molecule with highest affinity (i.e., the lowest score)