<a href="https://colab.research.google.com/github/porekhov/drug_design_2024/blob/main/docking_master_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title **Install Conda Colab and dependences (ignore possible errors)**

%%capture
!pip install -q condacolab
import condacolab
condacolab.install()

!conda install -c conda-forge py3dmol pdbfixer openbabel mdanalysis -y
!conda install -c bioconda mgltools -y
# install Vina
# !wget https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64 -O vina
# !chmod u+x vina
# %alias vina /content/vina
# also, install Qvina, a fast and accurate molecular docking tool,
# attained at accurately accelerating AutoDock Vina.
!git clone https://github.com/QVina/qvina
!chmod u+x /content/qvina/bin/qvina2.1
%alias qvina2 /content/qvina/bin/qvina2.1

In [None]:
#@title **Download the target structure from PDB**

from pdbfixer import PDBFixer
from openmm.app import PDBFile
import MDAnalysis as mda

# set the PDB code of your protein:
#@markdown Input PDB code of your protein:
pdb_id = "1tbf" #@param {type:"string"}

# first, we will use PDBFixer to download the protein structure
# using its PDB ID and save it as a pdb file using the openmm package:
fixer = PDBFixer(pdbid=pdb_id)
PDBFile.writeFile(fixer.topology, fixer.positions, open('input.pdb', 'w'))

# now, we can select and save in separate pdb files the structure of protein
# and the structure of the native ligand using MDAnalysis
# here, you can read about the selection syntax:
# https://docs.mdanalysis.org/stable/documentation_pages/selections.html
# read the pdb file
u = mda.Universe('input.pdb')

# change the selection strings for other protein-ligand complexes
target = u.select_atoms('protein and segid A')
ligand = u.select_atoms('resname VIA')

# write down two separate files with the target protein and the ligand
target.write("target.pdb")
ligand.write("ligand.pdb")

In [None]:
#@title **Visualize the target-ligand complex**

# show the protein and the ligand

import py3Dmol

view = py3Dmol.view()
view.removeAllModels()
view.setViewStyle({'style':'outline','color':'black','width':0.1})

view.addModel(open('target.pdb','r').read(), format='pdb')
Prot=view.getModel()
Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'})


view.addModel(open('ligand.pdb','r').read(),format='pdb')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'magentaCarbon','radius':0.2}})

view.zoomTo()
view.show()

In [None]:
#@title **Fix the target structure**

# pdbfixer allows to prepare the protein structure for docking
# E.g., add the missing atoms, remove/fix non-standard atoms/residues

fix = PDBFixer(filename='target.pdb')
# find missing residues
fix.findMissingResidues()
# find and replace nonstandard residues
fix.findNonstandardResidues()
fix.replaceNonstandardResidues()
# find and add missing atoms
fix.findMissingAtoms()
fix.addMissingAtoms()
# write an output file
PDBFile.writeFile(fix.topology, fix.positions, open('target_fix.pdb', 'w'))

In [None]:
#@title **Predifine the docking box**


# this preliminary step is required to define the center and the size of
# the binding site where the docking will be performed
# here, we define it based on the known ligand from the initial structure

# read the ligand structure using MDAnalysis
u = mda.Universe('ligand.pdb')

# define the geometric center of the ligand and save its X, Y, and Z coords
CenterX = u.atoms.center_of_geometry()[0]
CenterY = u.atoms.center_of_geometry()[1]
CenterZ = u.atoms.center_of_geometry()[2]

# define the box size by expanding 5 angstrom from the ligand in X, Y, and Z
minX = u.atoms.positions[:, 0].min() - 5.0
maxX = u.atoms.positions[:, 0].max() + 5.0
minY = u.atoms.positions[:, 1].min() - 5.0
maxY = u.atoms.positions[:, 1].max() + 5.0
minZ = u.atoms.positions[:, 2].min() - 5.0
maxZ = u.atoms.positions[:, 2].max() + 5.0

# size of the box
SizeX = maxX - minX
SizeY = maxY - minY
SizeZ = maxZ - minZ

# print the results:
center = {'center_x':CenterX,'center_y': CenterY, 'center_z': CenterZ}
size = {'size_x':SizeX,'size_y': SizeY,'size_z': SizeZ}
print('Parameters of the box:\n', center,'\n', size)

In [None]:
#@title **Prepare target and native ligand for docking**

# finally, the fixed protein structure should be additionally converted to PDBQT format
# which also includes special Autodock Vina atom types and partial charges
# the flag -A hydrogens
# we're doing it with the Autodock Tools suite (installed as mgltools via conda)

!pythonsh /usr/local/bin/prepare_receptor4.py -r target_fix.pdb -o target_fix.pdbqt -A hydrogens -U nphs_lps -v

# ... and prepare the ligand as a pdbqt file with the Autodock as well

!pythonsh /usr/local/bin/prepare_ligand4.py -l ligand.pdb -A bonds_hydrogens

In [None]:
#@title **Run docking and convert results for visualization**

# running docking in qvina2 (fast implementation of vina docking)
# apart from the pdbqt files for the target and ligand,
# you should specify the parameters of the docking box (size and center)
#
# the exhaustiveness affects the sampling completeness of the conformational
# space and is roughly proportional to the calculation time
# the random seed is set to assure the reproducibility of the docking poses
# cpu sets the number of available CPUs (2 is maximum at Colab)
#
# The output is a file containing the docking poses and a text based description
# of the docking results including an estimated affinity and the root mean
# square deviation (RMSD) of each pose from the pose with the highest predicted
# affinity

!/content/qvina/bin/qvina2.1 --receptor 'target_fix.pdbqt' --ligand 'ligand.pdbqt' \
--out 'ligand_output.pdbqt' --exhaustiveness 5 --cpu 2 \
--center_x {CenterX} --center_y {CenterY} --center_z {CenterZ} \
--size_x {SizeX} --size_y {SizeY} --size_z {SizeZ} --seed 12345 \
--num_modes 5

# or alternatively you can run original vina

# !/content/vina --receptor 'target_fix.pdbqt' --ligand 'ligand.pdbqt' \
# --out 'ligand_output.pdbqt' --exhaustiveness 8 --verbosity 2 \
# --center_x {CenterX} --center_y {CenterY} --center_z {CenterZ} \
# --size_x {SizeX} --size_y {SizeY} --size_z {SizeZ} --cpu 2 --seed 12345

from openbabel import pybel

# the output file of vina with ligand poses in the binding site
# containts at most 5 poses (you can change this behaviour
# using --num_modes flag)
#
# we can save each pose as an individual mol2 file for further visualization
# and analysis

dock_poses = list(pybel.readfile('pdbqt', 'ligand_output.pdbqt'))
for i, dock_pose in enumerate(dock_poses):
    dock_pose.write(format='mol2', filename='ligand_output_' + str(i) + '.mol2', overwrite=True)

In [None]:
#@title **Visualize docking results**

# show the pose with the highest score (pdb_lig_output_0.mol2)

import py3Dmol

view = py3Dmol.view()
view.removeAllModels()
view.setViewStyle({'style':'outline','color':'black','width':0.1})

view.addModel(open('target_fix.pdb','r').read(), format='pdb')
Prot=view.getModel()
Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'})


view.addModel(open('ligand.pdb','r').read(),format='pdb')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'magentaCarbon','radius':0.2}})

# change pdb_lig_output_0.mol2 to pdb_lig_output_X.mol2 X=0,1,2,3...
# to visualize other poses

view.addModel(open('ligand_output_1.mol2','r').read(),format='mol2')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'cyanCarbon','radius':0.2}})

view.zoomTo()
view.show()

In [None]:
#@title **Prepare molecules from the SDF file (from pharmacophore search) for docking**

from openbabel import pybel

# for the sdf file with multiple ligands, such as you will get from
# pharmit server after the pharmacophore search, you need a slightly different
# procedure

# read ligands found using the pharmacophore model, saved in query_results.sdf,
#  and save in pdbqt format
#
# DO NOT FORGET TO UPLOAD SDF FILE:
# 1. click Files menu in the left panel;
# 2. Upload to session storage;
# 3. Agree with the warning.
#

#@markdown Choose chain(s), comma separated:
sdf_file = "query_results.sdf" #@param {type:"string"}

for i, mol in enumerate(pybel.readfile("sdf", sdf_file)):
    print('Processed molecule', i)
    fname = 'mol_' + str(i) +'.pdb'
    mol.write(format='pdb', filename=fname, overwrite=True)
    !pythonsh /usr/local/bin/prepare_ligand4.py -l {fname} -A bonds_hydrogens

In [None]:
#@title **Dock molecule 0 (1, 2, 3...) from the pharmacophore search**

#@markdown Select molecule (0, 1, 2, 3, ...):
mol_ID = 0 #@param {type:"integer"}

mol_in = 'mol_'+str(mol_ID)+'.pdbqt'
mol_out = 'mol_'+str(mol_ID)+'_output.pdbqt'

!/content/qvina/bin/qvina2.1 --receptor 'target_fix.pdbqt' \
--ligand {mol_in} --out {mol_out} --exhaustiveness 5 --cpu 2 \
--center_x {CenterX} --center_y {CenterY} --center_z {CenterZ} \
--size_x {SizeX} --size_y {SizeY} --size_z {SizeZ} --seed 12345 \
--num_modes 1

# Convert the results to mol2 format

dock_poses = list(pybel.readfile('pdbqt', mol_out))
for i, dock_pose in enumerate(dock_poses):
    dock_pose.write(format='mol2', filename='mol_'+str(mol_ID)+'_output_' + str(i) + '.mol2', overwrite=True)

# Visualize the results for pose 0 of molecule 0

view = py3Dmol.view()
view.removeAllModels()
view.setViewStyle({'style':'outline','color':'black','width':0.1})

view.addModel(open('target_fix.pdb','r').read(), format='pdb')
Prot=view.getModel()
Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'})

view.addModel(open('ligand.pdb','r').read(),format='pdb')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'magentaCarbon','radius':0.2}})

view.addModel(open('mol_'+str(mol_ID)+'_output_0.mol2','r').read(),format='mol2')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'cyanCarbon','radius':0.2}})

view.zoomTo()
view.show()

In [None]:
#@title **Dock a molecule defined by its SMILES string**

#@markdown now, let's dock a molecule converted from SMILES string, replace the smiles with your own one:
smiles = 'O=C(C)Oc1ccccc1C(=O)O' #@param {type:"string"}

# read the smiles as a molecule
mol = pybel.readstring('smi', smiles)

# create a 3D conformation
mol.make3D()

# write an output file as pdb
mol.write(format='pdb', filename='mol_from_smi.pdb', overwrite=True)

# convert pdb to pdbqt for docking
!pythonsh /usr/local/bin/prepare_ligand4.py -l mol_from_smi.pdb -A bonds_hydrogens

# docking
!/content/qvina/bin/qvina2.1 --receptor 'target_fix.pdbqt' --ligand 'mol_from_smi.pdbqt' \
--out 'mol_from_smi_output.pdbqt' --exhaustiveness 5 --cpu 2 \
--center_x {CenterX} --center_y {CenterY} --center_z {CenterZ} \
--size_x {SizeX} --size_y {SizeY} --size_z {SizeZ} --seed 12345 \
--num_modes 1

# Convert the results to mol2 format
dock_poses = list(pybel.readfile('pdbqt', 'mol_from_smi_output.pdbqt'))
for i, dock_pose in enumerate(dock_poses):
    dock_pose.write(format='mol2', filename='mol_from_smi_output_' + str(i) + '.mol2', overwrite=True)

# Visualize the results for pose 0 of molecule 0

view = py3Dmol.view()
view.removeAllModels()
view.setViewStyle({'style':'outline','color':'black','width':0.1})

view.addModel(open('target_fix.pdb','r').read(), format='pdb')
Prot=view.getModel()
Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'})

view.addModel(open('mol_from_smi_output_0.mol2','r').read(),format='mol2')
ref_m = view.getModel()
ref_m.setStyle({},{'stick':{'colorscheme':'cyanCarbon','radius':0.2}})

view.zoomTo()
view.show()