# Molecular docking is the simplest yet one of the most powerful methods of modelling in the medicinal chemistry field. There are many licensed and free academic options there, but most still need a local installation. This is a cloud pipeline based on Autodock Vina (Smina), aiming to provide a totally free tool to those who want to use molecular docking in their projects. You don't have to install anything but provide an SDF file that could be from Chemdraw and a protein RCSB four-letter code.


# References and contribution eleboration
# This code was first inspired by [https://www.macinchem.org/reviews/JupyterDocking/jupyterdocking.php](http://) and [https://www.cheminformania.com/ligand-docking-with-smina/](http://)


1. The following was changed in this notebook compared to the above two:
1. This notebook provides docking with a pocket water option compared to the original one, what I have done is use Pymol to keep 5 of the ligand then use the water-contained receptor in docking. And you should only run one of them, be careful!

1. The docking validation and docking separately existed in two non-related notebooks, but this one integrated them in a single notebook, which will save time and be more convenient.
1. If you use this notebook on AI Studio(https://aistudio.baidu.com/aistudio/index), it realized the "permanent" installation, unlike the google collab in which you have to install each time before you could do anything. 

1. Conda, Pymol and three to four paths have been modified to maintain the compatibility on Colab, but also enable the use of AI Studio. 


# There are two stages of this notebook, docking validation and docking.

In [None]:
#@title Prepare something
!mkdir external-libraries
! wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh
! chmod +x Miniconda3-py37_4.8.2-Linux-x86_64.sh
! bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -f -p /home/aistudio/external-libraries
#**3.使用conda安装化学信息学软件rdkit**
! /home/aistudio/external-libraries/bin/conda install -c rdkit rdkit -y
#**4.用conda安装格式转换工具open babel**
!/home/aistudio/external-libraries/bin/conda install -q -y -c openbabel openbabel
#**5.使用pip安装可视化工具py3Dmol**
! /home/aistudio/external-libraries/bin/pip install py3Dmol # 3D Molecular Visualizer

In [None]:
#@title Import something
import sys
sys.path.append('/home/aistudio/external-libraries/lib/python3.7/')
#**7.雪对风**
import sys
sys.path.append('/home/aistudio/external-libraries/lib/python3.7/site-packages/')
#**8.大陆对长空（导入所有需要的工具）**
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import DataStructs
from rdkit.Chem import RDConfig
from rdkit.Chem import rdBase
import pickle
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print(rdBase.rdkitVersion)


In [None]:
#@title Download and install software

!wget --no-check-certificate https://sourceforge.net/projects/smina/files/smina.static/download -O smina.static
!chmod u+x smina.static
#**10.Trust but check （验证一下软件状态正常，正常情况下应当打印出使用说明）**
!./smina.static -h
#**11. 下载一个软件pymol**
!yes | /home/aistudio/external-libraries/bin/conda install -c conda-forge pymol-open-source 

# change the last four PDB letter, keep the .pdb

In [None]:
!wget https://files.rcsb.org/download/3htb.pdb

--2021-10-05 01:51:40--  https://files.rcsb.org/download/.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.158.70
Connecting to files.rcsb.org (files.rcsb.org)|128.6.158.70|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2021-10-05 01:51:41 ERROR 404: Not Found.



# Visit RCSB to find the Three-letter ligand code, substitute 567 with it
# Use the FOUR-letter pdb code to substitute 1234 below.

# Docking witout pocket waters (Don't run the second if you run this one)

In [None]:
#create ligand= resn 567
#create active_water= resn HOH within 5 of ligand
#remove resn HOH &! active_water
com_file = open('fetch_and_clean.pml','w')
com_file.write('''
load 1234.pdb
remove resn HOH
h_add elem O or elem N
select 1234-567, resn 567 #Create a selection called 1OYT-FSN from the ligand
select 1234-receptor, 1234 and not 1234-567 #Select all that is not the ligand
save 1234-567.pdb, 1234-567
save 1234-receptor.pdb, 1234-receptor    
''')
com_file.close()

# Docking with pocket waters (Don't run the first one if you run this one)

In [None]:

com_file = open('fetch_and_clean.pml','w')
com_file.write('''
load 1234.pdb
create ligand= resn 567 
create active_water= resn HOH within 5 of ligand
remove resn HOH &! active_water
h_add elem O or elem N
select 1234-567, resn 567 #Create a selection called 1OYT-FSN from the ligand
select 1234-receptor, 1234 and not 1234-567 #Select all that is not the ligand
save 1234-567.pdb, 1234-567
save 1234-receptor.pdb, 1234-receptor    
''')
com_file.close()

In [None]:
#@title Target preparation before docking

%cd /content/
! /home/aistudio/external-libraries/bin/pymol -c fetch_and_clean.pml

In [None]:
#@title Self dock Validation


!./smina.static -r 6NZP-receptor.pdbqt -l 1234-567.pdbqt --autobox_ligand 1234-LB7.pdbqt --autobox_add 8 --exhaustiveness 16 -o 1234-redock.pdbqt

In [None]:
#@title Import visualization software

import py3Dmol

def drawit2(m,confId=-1):
    mb = Chem.MolToMolBlock(m,confId=confId)
    p = py3Dmol.view(width=400, height=400)
    p.addModel(mb,'sdf')
    p.setStyle({'stick':{}})
    p.setBackgroundColor('0xeeeeee')
    p.zoomTo()
    return p

def DrawComplex(protein,ligand):
    complex_pl = Chem.MolToPDBBlock(Chem.CombineMols(protein,ligand))
    #complex_mol=Chem.CombineMols(receptor,mols[-1])
    view = py3Dmol.view(width=600,height=600)
    view.addModel(complex_pl,'pdb')
    #view.addModel(Chem.MolToMolBlock(mols[0]),'sdf')
    chA = {'chain':['H','L','I']}
    chB = {'resn':'UNL'}
    view.setStyle(chA,{'cartoon': {'color':'spectrum'}})
    #view.setStyle(chA,{'lines': {}})
    view.addSurface(py3Dmol.VDW,{'opacity':0.8}, chB)
    view.setStyle(chB,{'stick':{}})
    view.zoomTo()
    return view   


In [None]:
#@title Import something

from rdkit import Chem

# Post-dock processing, convert file format for visualization puroose，modify 4-letter code accordingly

In [None]:
!/home/aistudio/external-libraries/bin/obabel 3HTB-redock.pdbqt -O 3HTB-redocked.sdf

In [None]:
#@title Show image of ligand only
pose = "0" #@param ["0", "1", "2", "3", "4", "5", "6", "7", "8"]
mols = [m for m in Chem.SDMolSupplier('') if m is not None]
drawit2(mols['pose'])

In [None]:
#@title Show image of ligand-protien
pose = "0" #@param ["0", "1", "2", "3", "4", "5", "6", "7", "8"]

receptor = Chem.MolFromPDBFile('3HTB-receptor.pdb')
DrawComplex(receptor,mols[8])

# Above this cell is the validation of how good the docking method is；below is the real docking for small molecules of interests

In [None]:
#@title Import something

import sys
from collections import defaultdict

import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import PandasTools
import pandas as pd

%pylab inline
IPythonConsole.ipython_3d = True

import py3Dmol
import matplotlib.pyplot as plt
import subprocess

In [None]:
#@title Upload your protein and ligand please
%cd /content/
!pwd

In [None]:
#@title  Uploaded sdf file of small molecules, and provide the file name you want to give as a conformasion file generated fromt uploaded file.
sdfFilePath = "" #@param {type:"string"}
ConfoutputFilePath = "" #@param {type:"string"}
#sdfFilePath = 'fxa_ligands.sdf' # The input file of structures to generate conformations from
#ConfoutputFilePath = 'fxa_ligandsForDocking.sdf' # Output file containing conformations for docking
inputMols = [x for x in Chem.SDMolSupplier(sdfFilePath,removeHs=False)]
# Assign atomic chirality based on the structures:
len(inputMols) # Check how many strucures

In [None]:
#@title Rdkit validation, the output image has nothign to do with docking itself, just for validation

mol = Chem.MolFromSmiles('C[NH+]1CCN(CC1)C(=O)Nc1ccc(F)cc1') #This is the ligand in the crystal structure
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol)
AllChem.MMFFOptimizeMolecule(mol)
mol

In [None]:
#@title Test if each line of your sdf has a name, you should see nothing if all good

for i, mol in enumerate(inputMols):
    if mol is None:
        print('Warning: Failed to read molecule %s in %s' % (i, sdfFilePath))
    if not mol.GetProp('_Name'):

        print('Warning: No name for molecule %s in %s' % (i, sdfFilePath))

In [None]:
#@title Prepare something

import multiprocessing
from concurrent import futures

In [None]:
#@title Prepare something again

!/home/aistudio/external-libraries/bin/pip install progressbar

In [None]:
#@title Small molecules conformation generation

import multiprocessing

# Download this from http://pypi.python.org/pypi/futures
from concurrent import futures

# conda install progressbar
import progressbar

#Find number cores available, leave two or system might be unresponsive
numcores = multiprocessing.cpu_count()
max_workers = numcores #-1

#Knowledge based torsion generator http://pubs.acs.org/doi/abs/10.1021/acs.jcim.5b00654
# This function is called in the subprocess.
# The parameters (molecule and number of conformers) are passed via a Python

ps = AllChem.ETKDG()
ps.pruneRmsThresh=0.5
ps.numThreads=0
#Edit for number of confs desired eg n = 5
n=5
def generateconformations(m, n, name):
    m = Chem.AddHs(m)
    ids=AllChem.EmbedMultipleConfs(m, n, ps)
    for id in ids:
        AllChem.UFFOptimizeMolecule(m, confId=id)
    # EmbedMultipleConfs returns a Boost-wrapped type which
    # cannot be pickled. Convert it to a Python list, which can.
    return m, list(ids), name

smi_input_file, sdf_output_file = sys.argv[1:3]


writer = Chem.SDWriter(ConfoutputFilePath)
# suppl = [x for x in Chem.SDMolSupplier(sdfFilePath,removeHs=False)]
#suppl = Chem.SmilesMolSupplier(smi_input_file, titleLine=False)

# for mol in suppl:
#     print(mol.GetPropsAsDict(includePrivate=True).get('_Name'))

with futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
    # Submit a set of asynchronous jobs
    jobs = []
    for mol in inputMols:
        if mol:
            name = mol.GetProp('_Name')
            job = executor.submit(generateconformations, mol, n, name)
            jobs.append(job)

    widgets = ["Generating conformations; ", progressbar.Percentage(), " ",
               progressbar.ETA(), " ", progressbar.Bar()]
    pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(jobs))
    for job in pbar(futures.as_completed(jobs)):
        mol, ids, name = job.result()
        mol.SetProp('_Name', name)
        for id in ids:
            writer.write(mol, confId=id)
writer.close()

In [None]:
#@title Input one protein related name, three ligand related names, dokcing should starts here
ProteinForDocking = "" #@param {type:"string"}
LigandFromProtein = "" #@param {type:"string"}
DockedFilePath = "" #@param {type:"string"}
FlexibleDockedFilePath = "" #@param {type:"string"}
#ProteinForDocking = '1f0r_minimized.pdb'

#LigandFromProtein = '1f0r_native_ligand.pdb'
#DockedFilePath = '1f0r_All_Docked.sdf.gz'
#FlexibleDockedFilePath = '1f0r_FlexDocked.sdf.gz'
#**I：对接开始，每次更换蛋白和小分子，只需改上面的变量赋值即可**
!'./smina.static' --cpu 2 --seed 0 --autobox_ligand '{LigandFromProtein}' -r '{ProteinForDocking}' -l '{ConfoutputFilePath}' -o '{DockedFilePath}'

/bin/bash: ./smina.static: No such file or directory


# Upon finish, download the All_Docked_sdf.gz to your local laptop