<a href="https://colab.research.google.com/github/kirkis-07/Large-scale-molecular-docking-/blob/main/Automated_docking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Large scale molecular docking in colab using quick vina

### This is a large scale molecular docking script which uses oddt (open source drug discovery toolkit) library. This script includes various parameters the user can tweek.

### Cool thing is you dont need to provide x, y, z coordinates, this script uses Autobox functionality of autodock vina to compute the coordinates.

### The final results are stored in csv file, and best poses (score <= -8, which you can change) are stored in separate directory which you can use in future.

In [10]:
!pip install biopython
!pip install oddt
!pip install requests
!pip install rdkit
!sudo apt install autodock-vina
!sudo apt install openbabel

Collecting rdkit
  Downloading rdkit-2025.9.3-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (4.2 kB)
Downloading rdkit-2025.9.3-cp312-cp312-manylinux_2_28_x86_64.whl (36.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.4/36.4 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit
Successfully installed rdkit-2025.9.3
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libboost-filesystem1.74.0 libboost-program-options1.74.0
  libboost-thread1.74.0
Suggested packages:
  autodock autogrid
The following NEW packages will be installed:
  autodock-vina libboost-filesystem1.74.0 libboost-program-options1.74.0
  libboost-thread1.74.0
0 upgraded, 4 newly installed, 0 to remove and 1 not upgraded.
Need to get 1,120 kB of archives.
After this operation, 7,537 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu j

In [11]:
PDB_ID = '7C2M'
ref_ligand = 'FFU'
INPUT_FILE = 'best_best.txt'

# Get PDB

In [12]:
import requests
import os
import sys

def download_pdb(pdb_id):
    pdb_id = pdb_id.lower()
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    os.makedirs("PDB", exist_ok=True)
    if response.status_code == 200:
        file_path = os.path.join("PDB", f"{pdb_id}.pdb")
        with open(file_path, "wb") as f:
            f.write(response.content)
        print(f"Downloaded {pdb_id}.pdb to {file_path}")
        return file_path
    else:
        print(f"Failed to download {pdb_id}. HTTP status: {response.status_code}")

PDB_PATH = download_pdb(PDB)

Downloaded 7c2m.pdb to PDB/7c2m.pdb


In [13]:
from Bio.PDB import PDBParser, PDBIO, Select

parser = PDBParser(QUIET=True)
structure = parser.get_structure("holo", PDB_PATH)

class ProteinSelect(Select):
    def accept_residue(self, residue):
        return residue.id[0] == " "

class LigandSelect(Select):
    def __init__(self, ligand_resname):
        self.ligand_resname = ligand_resname

    def accept_residue(self, residue):
        return residue.id[0] != " " and residue.resname == self.ligand_resname

io = PDBIO()
io.set_structure(structure)
io.save("PDB/apo_protein.pdb", ProteinSelect())

io.set_structure(structure)
io.save("PDB/reference_ligand.pdb", LigandSelect(ref_ligand))

# GOOD THINGS TAKE TIME...

In [15]:
from rdkit import Chem
from rdkit import RDLogger
import oddt
import oddt.docking
import csv
import os

INPUT_FILE = "best_best.txt"
OUTPUT_CSV = "vina_results.csv"
BATCH_SIZE = 50
N_CPU = 4
lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL)

protein = next(oddt.toolkit.readfile('pdb', 'PDB/apo_protein.pdb'))
protein.protein = True
ref = next(oddt.toolkit.readfile('pdb', 'PDB/reference_ligand.pdb'))

docker = oddt.docking.AutodockVina.autodock_vina(protein=protein,auto_ligand=ref,n_cpu=N_CPU)

ALLOWED_ELEMENTS = {
    'H', 'C', 'N', 'O', 'S', 'P',
    'F', 'Cl', 'Br', 'I',
    'Mg', 'Mn', 'Zn', 'Ca', 'Fe', 'Cu'
}

def vina_element_filter_from_mol(mol):
    for atom in mol.GetAtoms():
        if atom.GetSymbol() not in ALLOWED_ELEMENTS:
            return False
    return True

def safe_prepare_ligand_from_mol(mol):
    try:
        mol = oddt.toolkits.rdk.Molecule(mol)
        mol.calccharges()
        mol.make3D()
        return mol
    except Exception:
        return None

def safe_dock(mol):
    try:
        poses = docker.dock(mol, protein=protein)
        if not poses:
            return None
        return poses[0].data['vina_affinity']
    except Exception:
        return None

file_exists = os.path.isfile(OUTPUT_CSV)

csv_file = open(OUTPUT_CSV, 'a', newline='')
writer = csv.writer(csv_file)

if not file_exists:
    writer.writerow(["smiles", "vina_affinity"])

buffer = []

with open(INPUT_FILE) as f:
    smiles_list = [s.strip() for s in f if s.strip()]

for i, smi in enumerate(smiles_list, 1):
    score = 0.0

    try:
        mol = Chem.MolFromSmiles(smi, sanitize=True)
        if mol and vina_element_filter_from_mol(mol):
            lig = safe_prepare_ligand_from_mol(mol)
            if lig:
                result = safe_dock(lig)
                if result is not None:
                    score = result
    except Exception:
        score = 0.0

    buffer.append([smi, score])

    if len(buffer) >= BATCH_SIZE:
        writer.writerows(buffer)
        csv_file.flush()
        buffer.clear()
        print(f"Saved {i} ligands")

if buffer:
    writer.writerows(buffer)
    csv_file.flush()

csv_file.close()
print("Docking completed.")

Docking completed.
