In [1]:
# @title 1.1 Install Condacolab (< 1min)
%%time

! pip install -q condacolab
import condacolab
condacolab.install()

#@markdown *Kernel will restart automatically after this installation*
#@markdown
#@markdown *Keep connected to the same runtime and proceed to the next code block*


⏬ Downloading https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:16
🔁 Restarting kernel...
CPU times: user 685 ms, sys: 264 ms, total: 949 ms
Wall time: 28.9 s


In [1]:
# @title 1.2 Install Packages and Data (~ 5min)
%%time

# Install Reduce2 (cctbx-base)
! conda install cctbx-base


# Install Prody and py3Dmol
! pip install prody py3Dmol


# Download Phenix Project geostd (restraint) Library
goestd_repo = "https://github.com/phenix-project/geostd.git"
! git clone {goestd_repo}


# Install Vina, Meeko (develop branch) and Dependencies
! conda install numpy scipy rdkit vina
! git clone --single-branch --branch develop https://github.com/forlilab/Meeko.git
! cd Meeko; pip install --use-pep517 -e .; cd ..


# Install Scrubber (develop branch)
! git clone --single-branch --branch develop https://github.com/forlilab/scrubber.git
! cd scrubber; pip install --use-pep517 -e .; cd ..


# Download Vina Executables
! wget https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64
! mv vina_1.2.5_linux_x86_64 vina; chmod +x vina;
! wget https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_split_1.2.5_linux_x86_64
! mv vina_split_1.2.5_linux_x86_64 vina_split; chmod +x vina_split

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / 

In [2]:
# @title 1.3 Import Modules & Helper Functions (< 1s)
%%time

# Import modules
import sys, platform
from prody import *
from pathlib import Path
from rdkit import Chem
from rdkit.Chem import AllChem
import rdkit, py3Dmol
print("rdkit version:", rdkit.__version__)
print("py3Dmol version:", py3Dmol.__version__)
from ipywidgets import interact, IntSlider
import ipywidgets, copy


# Helper functions
def locate_file(from_path = None, query_path = None, query_name = "query file"):

    if not from_path or not query_path:
        raise ValueError("Must specify from_path and query_path")

    possible_path = list(from_path.glob(query_path))

    if not possible_path:
        raise FileNotFoundError(f"Cannot find {query_name} from {from_path} by {query_path}")

    return_which = (
        f"using {query_name} at:\n"
        f"{possible_path[0]}\n"
    )
    print(return_which)

    return possible_path[0]


def confgen_from_smiles(smi = None, outputSDF = "ligand.sdf", numConfs = 50, maxIters = 5000):

    def which_conformer(opt_res):

      normal_res = [x for x in opt_res if x[0]==0]

      if len(normal_res)==0: # all opt failed
          return -1
      else:
          min_eng = min([x[1] for x in normal_res])
          for conf_id,r in enumerate(opt_res):
            if (r[0]==0 and r[1]==min_eng):
                return conf_id

    if smi is None:
      raise ValueError("Must specify smi")

    m = Chem.MolFromSmiles(smi)

    mh = Chem.AddHs(m)

    params = AllChem.ETKDGv3()
    params.numThreads = 0

    cids = AllChem.EmbedMultipleConfs(mh, numConfs, params)
    res = AllChem.MMFFOptimizeMoleculeConfs(mh, numThreads = 0, maxIters = maxIters)

    minconf = which_conformer(res)
    if minconf!=-1:
        with Chem.SDWriter(outputSDF) as w:
          w.write(mh, confId=minconf)
    else:
      raise RuntimeError("Optimization didn't converge. No conformer/SDF will be written")

# Commandline scripts
scrub = locate_file(from_path = Path("/usr/local/bin"), query_path = "scrub.py", query_name = "scrub.py")
mk_prepare_ligand = locate_file(from_path = Path("/usr/local/bin"), query_path = "mk_prepare_ligand.py", query_name = "mk_prepare_ligand.py")
mk_prepare_receptor = locate_file(from_path = Path("/usr/local/bin"), query_path = "mk_prepare_receptor.py", query_name = "mk_prepare_receptor.py")
mk_export = locate_file(from_path = Path("/usr/local/bin"), query_path = "mk_export.py", query_name = "mk_export.py")


# Locate reduce2 in conda install prefix
full_py_version = platform.python_version()
major_and_minor = ".".join(full_py_version.split(".")[:2])
env_path = Path("/usr/local") # default conda install prefix on Colab
reduce2_path = f"lib/python{major_and_minor}/site-packages/mmtbx/command_line/reduce2.py"
reduce2 = locate_file(from_path = env_path, query_path = reduce2_path, query_name = "reduce2.py")


# Locate geostd in current path
geostd_path = locate_file(from_path = Path.cwd(), query_path = "geostd", query_name = "geostd")

rdkit version: 2023.09.6
py3Dmol version: 2.4.0
using scrub.py at:
/usr/local/bin/scrub.py

using mk_prepare_ligand.py at:
/usr/local/bin/mk_prepare_ligand.py

using mk_prepare_receptor.py at:
/usr/local/bin/mk_prepare_receptor.py

using mk_export.py at:
/usr/local/bin/mk_export.py

using reduce2.py at:
/usr/local/lib/python3.10/site-packages/mmtbx/command_line/reduce2.py

using geostd at:
/content/geostd

CPU times: user 1.02 s, sys: 165 ms, total: 1.19 s
Wall time: 1.95 s


In [3]:
# @title # 2.1 [Basic Docking] Ligand Preparation (< 5s)
%%time

# @markdown *Input*
ligand_Smiles = "C1[C@H]([C@@H]([C@H]([C@@H]([C@H]1N)O[C@@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CN)O)O)O)O)O[C@@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CO)O)N)O)N" #@param {type:"string"}
# @markdown *Options*
pH = 7.5 #@param {type:"raw"}
args = ""
skip_tautomer = True #@param {type:"boolean"}
if skip_tautomer:
  args += "--skip_tautomer "
skip_acidbase = False #@param {type:"boolean"}
if skip_acidbase:
  args += "--skip_acidbase "
# @markdown *Output*
ligandPDBQT = "KAN.pdbqt" #@param {type:"string"}


# Write scrubbed protomer(s) and conformer(s) to SDF
ligandName = ligandPDBQT.replace(".pdbqt", "")
ligandSDF = f"{ligandName}_scrubbed.sdf"
! python {scrub} "{ligand_Smiles}" -o {ligandSDF} --ph {pH} {args}


# Prepare ligand PDBQT
! python {mk_prepare_ligand} -i {ligandSDF} -o {ligandPDBQT}


# Visualization with py3Dmol
view = py3Dmol.view()
view.addModel(open(ligandSDF, 'r').read(),'sdf')
view.zoomTo()
view.setBackgroundColor('white')
view.addStyle({'stick': {'colorscheme':'yellowCarbon'}})
view.show()

Scrub completed.
Summary of what happened:
Input molecules supplied: 1
mols processed: 1, skipped by rdkit: 0, failed: 0
nr isomers (tautomers and acid/base conjugates): 1 (avg. 1.000 per mol)
nr conformers:  1 (avg. 1.000 per isomer, 1.000 per mol)


CPU times: user 51.9 ms, sys: 10.5 ms, total: 62.4 ms
Wall time: 5.35 s


In [43]:
# @title # 2.2 [Basic Docking] Receptor Preparation (< 30s)
%%time

#@markdown ## (1) Add Hydrogens to Receptor with Reduce2

# @markdown *Input*
# Download PDB file
pdb_token = "5iqb" #@param {type:"string"}
! curl "http://files.rcsb.org/view/{pdb_token}.pdb" -o "{pdb_token}.pdb"


# Export receptor atoms
atoms_from_pdb = parsePDB(pdb_token)
# @markdown *ProDy Options*
receptor_selection = "chain A and not water and not hetero" #@param {type:"string"}
receptor_atoms = atoms_from_pdb.select(receptor_selection)
prody_receptorPDB = f"{pdb_token}_receptor_atoms.pdb"
writePDB(prody_receptorPDB, receptor_atoms)

cofactor_selection = "chain A and resname MG GNP" #@param {type:"string"}
cofactor_atoms = atoms_from_pdb.select(cofactor_selection)
prody_cofactorPDB = f"{pdb_token}_cofactor_atoms.pdb"
writePDB(prody_cofactorPDB, cofactor_atoms)


# Add CRYST1 card (temporarily required for reduce2)
reduce_inputPDB = f"{pdb_token}_receptor.pdb"
! cat <(grep "CRYST1" "{pdb_token}.pdb") {prody_receptorPDB} > {reduce_inputPDB}


# Run reduce2
# @markdown *Reduce Options*
reduce_opts = "approach=add add_flip_movers=True" #@param {type:"string"}
! export MMTBX_CCP4_MONOMER_LIB="{geostd_path}"; python {reduce2} {reduce_inputPDB} {reduce_opts}
# Default name of reduce output...
prepare_inPDB = f"{pdb_token}_receptorFH.pdb"

# Add ions to reduce output (temporarily required for reduce2)
! cat {prody_ionPDB} {prepare_inPDB} > temp.pdb
! grep -v '^END' temp.pdb > {prepare_inPDB}

#@markdown ---
#@markdown ## (2) Receptor Preparation with Meeko

# @markdown *Options*
args = ""
allow_bad_res = True #@param {type:"boolean"}
if allow_bad_res:
  args += "--allow_bad_res "


# Specify Box
# Center at ligand
atoms_from_pdb = parsePDB(pdb_token)
# @markdown *Center the box at...*
ligand_selection = "chain A and resname KAN" #@param {type:"string"}
ligand_atoms = atoms_from_pdb.select(ligand_selection)
center_x, center_y, center_z = calcCenter(ligand_atoms)


# @markdown *For future reference, export ligand's original position to...*
prody_ligandPDB = "LIG.pdb" #@param {type:"string"}
writePDB(prody_ligandPDB, ligand_atoms)


# Size in each dimension
# @markdown *Set size of the box to...*
size_x = 20.0 #@param {type:"raw"}
size_y = 20.0 #@param {type:"raw"}
size_z = 20.0 #@param {type:"raw"}


# Prepare Receptor
# @markdown *Output*
prepare_outPDBQT = "5iqb_receptorFH.pdbqt" #@param {type:"string"}
add_templates = "GNP_chemical_templates.json"
! python {mk_prepare_receptor} --add_templates {add_templates} --macromol {prepare_inPDB} -o {prepare_outPDBQT} --box_center {center_x} {center_y} {center_z} --box_size {size_x} {size_y} {size_z} {args}


# Visualization with py3Dmol
def Receptor3DView(receptorPDB = None, boxPDB = None, ligPDB = None):

    view = py3Dmol.view()
    view.setBackgroundColor('white')

    view.addModel(open(boxPDB, 'r').read(),'pdb')
    view.addStyle({'stick': {}})
    view.zoomTo()

    view.addModel(open(receptorPDB, 'r').read(),'pdb')
    view.addStyle({'cartoon': {'color':'spectrum', 'opacity': 0.5}})

    if ligPDB is not None:
      view.addModel(open(ligPDB, 'r').read(), 'pdb')
      view.addStyle({'hetflag': True}, {'stick': {}})

    return view

Receptor3DView(receptorPDB = prepare_inPDB, \
               boxPDB = prepare_outPDBQT.replace('.pdbqt', '.box.pdb'), \
               ligPDB = prody_ligandPDB).show()

curl: /usr/local/lib/libcurl.so.4: no version information available (required by curl)
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1792k    0 1792k    0     0  8652k      0 --:--:-- --:--:-- --:--:-- 8700k


@> PDB file is found in working directory (5iqb.pdb).
@> 10606 atoms and 1 coordinate set(s) were parsed in 0.18s.


Starting /usr/local/lib/python3.10/site-packages/mmtbx/command_line/reduce2.py
on Tue Oct  1 00:04:57 2024 by root

Processing files:
-------------------------------------------------------------------------------

  Found model, 5iqb_receptor.pdb

Processing PHIL parameters:
-------------------------------------------------------------------------------

  Adding command-line PHIL:
  -------------------------
    approach=add
    add_flip_movers=True

Final processed PHIL parameters:
-------------------------------------------------------------------------------
  data_manager {
    model {
      file = "5iqb_receptor.pdb"
    }
    default_model = "5iqb_receptor.pdb"
  }
  add_flip_movers = True


Starting job
Writing model output to 5iqb_receptorFH.pdb

                       ----------Loading Model----------                       


                      ----------Adding Hydrogens----------                     

Number of hydrogen atoms added to the input model: 2315 


The followi

@> PDB file is found in working directory (5iqb.pdb).
@> 10606 atoms and 1 coordinate set(s) were parsed in 0.16s.


templates=<meeko.linked_rdkit_chorizo.ResidueChemTemplates object at 0x7eb3cca8f310>
@> 4749 atoms and 1 coordinate set(s) were parsed in 0.06s.
No template matched for residue_key='A:234'
tried 6 templates for residue_key='A:234'excess_H_ok=False
LYS        heavy_miss=4 heavy_excess=0 H_excess=0 bond_miss={4} bond_excess=set()
NLYS       heavy_miss=4 heavy_excess=0 H_excess=0 bond_miss=set() bond_excess=set()
CLYS       heavy_miss=5 heavy_excess=0 H_excess=0 bond_miss={5} bond_excess={1}
LYN        heavy_miss=4 heavy_excess=0 H_excess=0 bond_miss={4} bond_excess=set()
NLYN       heavy_miss=4 heavy_excess=0 H_excess=0 bond_miss=set() bond_excess=set()
CLYN       heavy_miss=5 heavy_excess=0 H_excess=0 bond_miss={5} bond_excess={1}

No template matched for residue_key='A:235'
tried 6 templates for residue_key='A:235'excess_H_ok=False
LYS        heavy_miss=4 heavy_excess=0 H_excess=0 bond_miss=set() bond_excess=set()
NLYS       heavy_miss=4 heavy_excess=0 H_excess=0 bond_miss=set() bond_e

CPU times: user 973 ms, sys: 79 ms, total: 1.05 s
Wall time: 39.3 s


In [44]:
# @title # 2.3 [Basic Docking] Docking with Vina Scoring Function (~ 3min)
%%time

#@markdown *Input*
receptorPDBQT = "5iqb_receptorFH.pdbqt" #@param {type:"string"}
ligandPDBQT = "KAN.pdbqt" #@param {type:"string"}
configTXT = "5iqb_receptorFH.box.txt" #@param {type:"string"}
#@markdown *Options*
exhaustiveness = 8 #@param {type:"raw"}
#@markdown *Output*
outputPDBQT = "5iqb_KAN_vina_out.pdbqt" #@param {type:"string"}

! vina --receptor {receptorPDBQT} --ligand {ligandPDBQT} --config {configTXT} \
       --exhaustiveness {exhaustiveness} \
       --out {outputPDBQT}

AutoDock Vina 36dd023-mod
#################################################################
# If you used AutoDock Vina in your work, please cite:          #
#                                                               #
# J. Eberhardt, D. Santos-Martins, A. F. Tillack, and S. Forli  #
# AutoDock Vina 1.2.0: New Docking Methods, Expanded Force      #
# Field, and Python Bindings, J. Chem. Inf. Model. (2021)       #
# DOI 10.1021/acs.jcim.1c00203                                  #
#                                                               #
# O. Trott, A. J. Olson,                                        #
# AutoDock Vina: improving the speed and accuracy of docking    #
# with a new scoring function, efficient optimization and       #
# multithreading, J. Comp. Chem. (2010)                         #
# DOI 10.1002/jcc.21334                                         #
#                                                               #
# Please see https://github.com/ccsb-scripps/AutoD

In [45]:
# @title # 2.4 [Basic Docking] Export and Visualize Docked Poses (~ 1s)
%%time

# Export Docked Poses
#@markdown *Export docked poses to...*
dock_outSDF = "5iqb_KAN_vina_out.sdf" #@param {type:"string"}
! python {mk_export} {outputPDBQT} -o {dock_outSDF}

#@markdown *Visualize docked poses with...*
# Previously Generated Receptor Files
receptorPDB = "5iqb_receptorFH.pdb" #@param {type:"string"}
boxPDB = "5iqb_receptorFH.box.pdb" #@param {type:"string"}
refligPDB = 'LIG.pdb' #@param {type:"string"}
reflig_resn = 'KAN' #@param {type:"string"}

# Visualize Docked Poses
def Complex3DView(view, ligmol = None, refligPDB = None, reflig_resn = None):

    new_viewer = copy.deepcopy(view)

    mblock = Chem.MolToMolBlock(ligmol)
    new_viewer.addModel(mblock, 'mol')
    new_viewer.addStyle({'hetflag': True}, {"stick": {'colorscheme': 'greenCarbon'}})

    if refligPDB is not None and reflig_resn is not None:
      new_viewer.addModel(open(refligPDB, 'r').read(), 'pdb')
      new_viewer.addStyle({'resn': reflig_resn}, {"stick": {'colorscheme': 'magentaCarbon', 'opacity': 0.8}})

    return new_viewer


confs = Chem.SDMolSupplier(dock_outSDF)

def conf_viewer(idx):
    mol = confs[idx]
    return Complex3DView(Receptor3DView(receptorPDB = receptorPDB, boxPDB = boxPDB), \
                         mol, \
                         refligPDB = refligPDB, reflig_resn = reflig_resn).show()


interact(conf_viewer, idx=ipywidgets.IntSlider(min=0, max=len(confs)-1, step=1))

interactive(children=(IntSlider(value=0, description='idx', max=8), Output()), _dom_classes=('widget-interact'…

CPU times: user 74.1 ms, sys: 9.54 ms, total: 83.6 ms
Wall time: 2.09 s


<function __main__.conf_viewer(idx)>

In [46]:
# Download Zipped Output
! mkdir output; cp * output
! zip -r output.zip output

cp: -r not specified; omitting directory 'geostd'
cp: -r not specified; omitting directory 'Meeko'
cp: -r not specified; omitting directory 'output'
cp: -r not specified; omitting directory 'sample_data'
cp: -r not specified; omitting directory 'scrubber'
  adding: output/ (stored 0%)
  adding: output/5iqb_receptorFH.box.pdb (deflated 79%)
  adding: output/5iqb_receptorFH.gpf (deflated 67%)
  adding: output/5iqb_KAN_vina_out.sdf (deflated 84%)
  adding: output/5iqb_receptorFH.pdb (deflated 74%)
  adding: output/5iqb_receptorFH.box.txt (deflated 39%)
  adding: output/condacolab_install.log (deflated 75%)
  adding: output/vina (deflated 63%)
  adding: output/5iqb.pdb (deflated 75%)
  adding: output/5iqb_receptorFH.pdbqt (deflated 76%)
  adding: output/5iqb_receptor_atoms.pdb (deflated 74%)
  adding: output/GNP_chemical_templates.json (deflated 57%)
  adding: output/temp.pdb (deflated 74%)
  adding: output/5iqb_KAN_vina_out.pdbqt (deflated 85%)
  adding: output/5iqb_receptor.pdb (deflated