In [None]:
# ------------------------------------------------

import os

#source /etc/os-release && echo $PRETTY_NAME
with open('/etc/os-release') as fh:
  for line in fh:
    if line and line[0] != '#':
      os.environ[line.split('=')[0]] = line.split('=')[1]
print(f'Running {os.environ["PRETTY_NAME"]}') # on {os.environ["HOST"]} as {os.environ["USER"]}')
import os

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

# ------------------------------------------------

from IPython.display import display, clear_output
import time, sys
tick = time.time()
# Install mamba without resetting the kernel alla condacolab
!wget -qnc https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh
!bash Mambaforge-Linux-x86_64.sh -bfp /usr/local
sys.path.append('/usr/local/lib/python3.10/site-packages')
!mamba config --set auto_update_conda false
!pip -q install rdkit
#!pip -q install pdb2pqr
!pip install -q meeko
!pip install -q git+https://github.com/matteoferla/DTC-compchem-practical.git
!mamba install -y -c conda-forge -c bioconda vina oddt openbabel
!pip install -q prolif MDAnalysis
!mamba install -y -c hcc adfr-suite
tock = time.time()
clear_output()
print(f'Installation time: {tock - tick}')

# ------------------------------------------------

import warnings
with warnings.catch_warnings():
  warnings.simplefilter("ignore")
  # last year used pyrosetta... the warning is distracting.
  import DTC_compchem_practical as dtc

## Preparation

Let's dock QRU into Mac1.
The ligand `QRU` was derived from 6FZ and BHA.
`QRU` is a chemical component identifier, but its Enamine REAL catalogue number is `Z5021668601`.
You can find details of the structure and ligand in the PDB: https://www.rcsb.org/structure/5SQJ and
 https://www.rcsb.org/ligand/QRU.
 6FZ and BHA hits were used as templates for the derivative QRU, which is a merger.
 So we will use them to get the centroid.

We should not cheat: we should start with the SMILES of QRU. Find it!


In [None]:
# so we will use their centroid for placement
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import numpy.typing as npt

template_hits = [Chem.MolFromMolBlock(b) for b in map(dtc.get_data, ('6FZ.mol', 'BHA.mol'))]
combo: Chem.Mol = Chem.CombineMols(*template_hits)
center = list(AllChem.ComputeCentroid( combo.GetConformer()))
size = int(np.max(AllChem.Get3DDistanceMatrix(combo))) + 10
print(f'Box size: {size}x{size}x{size}')

# we will use this for reference only!
crystal: Chem.Mol = Chem.MolFromMolBlock(dtc.get_data('QRU.mol'))
# Let's start from a SMILES which you found _online_
ligand_smiles = '👾👾👾'
ligand = Chem.MolFromSmiles(ligand_smiles)
AllChem.EmbedMol(ligand)
ligand_filename = 'QRU.pdb'
Chem.MolToPDBFile(ligand, ligand_filename)
# should we have added hydogens or no here?

In [None]:
import pkg_resources, os, subprocess
from pathlib import Path

prepped_template_filaname = 'template.pdbqt'
prepped_ligand_filaname = 'ligand.pdbqt'
template_filename: str = pkg_resources.resource_filename('DTC_compchem_practical', 'data/mac1-stripped.pdb')
pipe = subprocess.Popen(f'prepare_receptor -r {template_filename} '+
                        f'-o {prepped_template_filaname}',
                           stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
output, error = pipe.communicate()
print(output.decode('utf-8'))
assert pipe.returncode == 0, f"Error: {error.decode('utf-8')}"
assert (Path('/content') / prepped_template_filaname).exists(), 'prepare_receptor failed??'

pipe = subprocess.Popen(f'prepare_ligand -l {ligand_filename} '+\
                        f'-o {prepped_ligand_filaname} -A hydrogens',
                           stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
output, error = pipe.communicate()
print(output.decode('utf-8'))
assert pipe.returncode == 0, f"Error: {error.decode('utf-8')}"
assert (Path('/content') / prepped_ligand_filaname).exists(), 'prepare_ligand failed??'
print(f'pdbqt files {prepped_template_filaname} and {prepped_ligand_filaname} made')

In [None]:
# The ligand `QRU` was derived from 6FZ and BHA
# so we will use their centroid for placement
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import numpy.typing as npt

# 6FZ and BHA hits used as templates for the derivative QRU
template_hits = [Chem.MolFromMolBlock(b) for b in map(dtc.get_data, ('6FZ.mol', 'BHA.mol'))]
combo: Chem.Mol = Chem.CombineMols(*template_hits)
center = list(AllChem.ComputeCentroid( combo.GetConformer()))
size = int(np.max(AllChem.Get3DDistanceMatrix(combo))) + 10
print(f'Box size: {size}x{size}x{size}')
# ------------------------------------------------

from rdkit import Chem
from rdkit.Chem import AllChem
from vina import Vina
# score function to use Vina or another, see help(Vina) for more
vina = Vina(sf_name='vina')
vina.set_receptor(prepped_template_filaname)
vina.set_ligand_from_file(prepped_ligand_filaname)
vina.compute_vina_maps(center=center, box_size=[size, size, size])
energy = vina.score()
print('Score before minimization: %.3f (kcal/mol)' % energy[0])
energy_minimized = vina.optimize()
print('Score after minimization : %.3f (kcal/mol)' % energy_minimized[0])
vina.write_pose('vina_minimized.pdbqt', overwrite=True)
vina.dock(exhaustiveness=32, n_poses=20)
vina.write_poses('vina_out.pdbqt', n_poses=10, overwrite=True)

> What is the effect of a bigger box size? Why not do blind docking?

👾👾👾

In [None]:
# The format needs converting from PDBQT to RDKit objects
# unfortunately, there is the same song and dance to do...
import io
from openbabel import openbabel as ob
from typing import List

crystal = Chem.MolFromMolBlock(dtc.get_data('QRU.mol'))

obConversion = ob.OBConversion()
obConversion.SetInAndOutFormats("pdbqt", "sdf")
mol = ob.OBMol()
obConversion.ReadString(mol, vina.poses())
# or...
#obConversion.ReadFile(mol, 'vina_out.pdbqt')
sdf_block: str = obConversion.WriteString(mol)
with Chem.ForwardSDMolSupplier(io.BytesIO(sdf_block.encode())) as sdfh:
  poses: List[Chem.Mol] = list(sdfh)

# let's have a look
display(poses[0])

> What is going on?

👾👾👾

> Spot on. The hydrogens are now radicals. That is awkward. Were we meant to use a structure with hydrogens?

👾👾👾

In [None]:
# You answered no, Let's fix the chemistry then:
for i, mol in enumerate(poses):
  AllChem.AssignBondOrdersFromTemplate(crystal, mol)
  print(f'The pose number {i} has an RMSD of {AllChem.CalcRMS(crystal, poses[0])}Aastroem')
  print(mol.GetProp('REMARK'))

view = dtc.get_protein_view(dtc.get_data('mac1-stripped.pdb'), resn='HOH')
dtc.add_mols(view, cyanCarbon=crystal, whiteCarbon=poses[0])
view.zoomTo({'model': -1})
view.show()

In [None]:
import MDAnalysis as mda
import prolif as plf

u = mda.Universe(protein_file)
protein_mol = plf.Molecule.from_mda(u)
# display (remove `slice(260, 263)` to show all residues)
plf.display_residues(protein_mol, slice(260, 263))