# Synthalis Dock V2

## Imports and Setup

In [1]:
import os
import sys
import re
import shutil
import requests
import numpy as np
import pandas as pd
import py3Dmol
import subprocess
import ipywidgets as widgets
from pathlib import Path
from IPython.display import display, clear_output, HTML
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')  # Suppress RDKit warnings

# Configuration
SYNTHALISDOCK_PATH = Path("/home/imraan/Downloads/Jupyter_Dock")  # UPDATE THIS PATH
BASE_DIR = Path.cwd()
DATA_DIR = BASE_DIR / "docking_data"
RECEPTOR_DIR = DATA_DIR / "receptors"
LIGAND_DIR = DATA_DIR / "ligands"
RESULTS_DIR = DATA_DIR / "results"

for d in [DATA_DIR, RECEPTOR_DIR, LIGAND_DIR, RESULTS_DIR]:
    d.mkdir(exist_ok=True, parents=True)

# Verify Vina installation
try:
    vina_check = subprocess.run(["vina", "--help"], capture_output=True, text=True)
    assert "usage:" in vina_check.stdout.lower()
    print("✅ AutoDock Vina is properly installed")
except Exception as e:
    print(f"❌ Vina check failed: {e}\nInstall with: conda install -c conda-forge autodock-vina")


❌ Vina check failed: 
Install with: conda install -c conda-forge autodock-vina


## Check Tools

In [2]:
def verify_installation():
    print("🔍 Checking minimal requirements:")
    
    # Check Vina
    try:
        vina_path = shutil.which("vina")
        assert vina_path
        print(f"✅ AutoDock Vina: {vina_path}")
    except:
        print("❌ AutoDock Vina not found (required)")
    
    # Check RDKit
    try:
        import rdkit
        print(f"✅ RDKit: {rdkit.__version__}")
    except:
        print("❌ RDKit not installed (required)")
    
    # Check OpenBabel (optional)
    try:
        obabel_path = shutil.which("obabel")
        if obabel_path:
            print(f"⚠ OpenBabel: {obabel_path}")
    except:
        pass

verify_installation()

🔍 Checking minimal requirements:
✅ AutoDock Vina: /home/imraan/miniforge3/envs/jupyter_dock/bin/vina
✅ RDKit: 2024.03.6
⚠ OpenBabel: /home/imraan/Downloads/Jupyter_Dock/mgltools_x86_64Linux2_1.5.7/bin/obabel


## Test Autodock Vina

## Enhanced Preparation Functions

In [3]:
# Known working receptor-ligand pairs
WORKING_PAIRS = {
    "P00520": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",  # Tyrosine kinase + Caffeine
    "P00734": "CC(=O)OC1=CC=CC=C1C(=O)O",     # Thrombin + Aspirin
    "P00918": "NC(=O)C1=CC=C(O)C=C1"          # Carbonic anhydrase + Paracetamol
}

def get_protein_center(pdbqt_file):
    """Calculate center of mass from PDBQT file"""
    coords = []
    with open(pdbqt_file) as f:
        for line in f:
            if line.startswith(("ATOM", "HETATM")):
                try:
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                    coords.append([x,y,z])
                except:
                    continue
    return np.mean(coords, axis=0) if coords else [15, 15, 15]

def prepare_receptor(uniprot_id):
    try:
        pdb_file = RECEPTOR_DIR / f"receptor_{uniprot_id}.pdb"
        pdbqt_file = RECEPTOR_DIR / f"receptor_{uniprot_id}.pdbqt"
        
        if not pdb_file.exists():
            url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-model_v4.pdb"
            response = requests.get(url, timeout=30)
            response.raise_for_status()
            with open(pdb_file, 'w') as f:
                f.write(response.text)
        
        with open(pdb_file) as fin, open(pdbqt_file, 'w') as fout:
            for line in fin:
                if line.startswith(("ATOM", "HETATM")):
                    fout.write(line[:56] + "  0.00  0.00\n")
        
        return pdbqt_file
    except Exception as e:
        print(f"❌ Receptor {uniprot_id} failed: {e}")
        return None

def prepare_ligand(smiles, idx):
    try:
        mol = Chem.MolFromSmiles(smiles)
        if not mol:
            raise ValueError("Invalid SMILES")
        
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol)
        AllChem.MMFFOptimizeMolecule(mol)
        
        pdbqt_file = LIGAND_DIR / f"ligand_{idx}.pdbqt"
        with open(pdbqt_file, 'w') as f:
            f.write(Chem.MolToPDBBlock(mol))
        
        return pdbqt_file
    except Exception as e:
        print(f"❌ Ligand {idx} failed: {e}")
        return None

## Enhanced Docking Function

In [4]:
def run_docking(receptor_pdbqt, ligand_pdbqt, output_dir, box_size=20):
    try:
        output_dir.mkdir(exist_ok=True, parents=True)
        center_x, center_y, center_z = get_protein_center(receptor_pdbqt)
        
        print(f"    Using center: {center_x:.1f}, {center_y:.1f}, {center_z:.1f}")
        print(f"    Box size: {box_size}Å")
        
        result = subprocess.run([
       "vina",
            "--receptor", str(receptor_pdbqt),
            "--ligand", str(ligand_pdbqt),
            "--center_x", str(center_x),
            "--center_y", str(center_y), 
            "--center_z", str(center_z),
            "--size_x", str(box_size),
            "--size_y", str(box_size), 
            "--size_z", str(box_size),
            "--exhaustiveness", "32",  # Increased for better results
            "--num_modes", "10",       # Generate multiple poses
            "--energy_range", "4",     # Keep poses within 4 kcal/mol
            "--out", str(ouRDKittput_dir / "docked.pdbqt"),
            "--log", str(output_dir / "log.txt")
        ], capture_output=True, text=True, timeout=600)
        
        # Debug output
        if result.stdout:
            print("    Vina output:", result.stdout[:200] + "...")
        
        # Parse scores
        scores = []
        log_file = output_dir / "log.txt"
        if log_file.exists():
            with open(log_file) as f:
                for line in f:
                    if "Affinity" in line:
                        try:
                            scores.append(float(line.split()[1]))
                        except:
                            continue
        return scores if scores else None
    except Exception as e:
        print(f"    ❌ Docking error: {e}")
        return None

## Visualization Function

In [5]:
def visualize_results(receptor_pdb, ligand_pdb, docked_pdbqt=None):
    viewer = py3Dmol.view(width=800, height=600)
    
    # Receptor
    with open(receptor_pdb) as f:
        viewer.addModel(f.read(), 'pdb')
    viewer.setStyle({'cartoon': {'color': 'spectrum'}})
    
    # Original ligand
    with open(ligand_pdb) as f:
        viewer.addModel(f.read(), 'pdb')
    viewer.setStyle({'model': -1}, {'stick': {'colorscheme': 'greenCarbon'}})
    
    # Docked pose
    if docked_pdbqt and docked_pdbqt.exists():
        with open(docked_pdbqt) as f:
            viewer.addModel(f.read(), 'pdbqt')
        viewer.setStyle({'model': -1}, {'stick': {'colorscheme': 'redCarbon'}})
    
    viewer.zoomTo()
    return viewer

## UI Setup

In [6]:
# Pre-populated working examples
DEFAULT_RECEPTORS = list(WORKING_PAIRS.keys())[:3]  # First 3 known receptors
DEFAULT_LIGANDS = list(WORKING_PAIRS.values())[:3]  # Their corresponding ligands

# Input widgets
uniprot_inputs = [
    widgets.Text(
        value=DEFAULT_RECEPTORS[i] if i < len(DEFAULT_RECEPTORS) else "",
        placeholder=f'UniProt ID {i+1}',
        layout=widgets.Layout(width='90%'))
    for i in range(3)
]

smiles_inputs = [
    widgets.Text(
        value=DEFAULT_LIGANDS[i] if i < len(DEFAULT_LIGANDS) else "",
        placeholder=f'SMILES {i+1}',
        layout=widgets.Layout(width='90%'))
    for i in range(3)
]

box_size_slider = widgets.IntSlider(
    value=20,  # Smaller box for known pairs
    min=15,
    max=30,
    step=1,
    description='Box Size (Å):'
)

run_btn = widgets.Button(
    description="🚀 Run Docking",
    button_style='success',
    layout=widgets.Layout(width='200px', height='40px')
)

output_area = widgets.Output()

# Display UI
display(widgets.VBox([
    widgets.HTML("<h1>Molecular Docking Workflow</h1>"),
    widgets.HTML("<h3>Known Working Receptors</h3>"),
    widgets.VBox(uniprot_inputs),
    widgets.HTML("<h3>Their Corresponding Ligands</h3>"),
    widgets.VBox(smiles_inputs),
    box_size_slider,
    widgets.HBox([run_btn]),
    output_area
]))

VBox(children=(HTML(value='<h1>Molecular Docking Workflow</h1>'), HTML(value='<h3>Known Working Receptors</h3>…

## Main Workflow (Fixed Run Button)

In [7]:
def on_run_clicked(btn):
    with output_area:
        output_area.clear_output()
        print("⚡ Starting Docking Workflow (Using Known Working Pairs)")
        
        try:
            # Get inputs
            uniprot_ids = [b.value.strip() for b in uniprot_inputs if b.value.strip()]
            smiles_list = [b.value.strip() for b in smiles_inputs if b.value.strip()]
            box_size = box_size_slider.value
            
            print(f"\n🔬 Using pre-configured working pairs:")
            for uid, smi in zip(uniprot_ids, smiles_list):
                print(f"- {uid} + {smi}")
            print(f"Box size: {box_size}Å")
            
            # Prepare structures
            print("\n🛠️ Preparing Structures...")
            rec_files = []
            for uid in uniprot_ids:
                print(f"  - Processing {uid}...")
                rec_file = prepare_receptor(uid)
                if rec_file:
                    rec_files.append(rec_file)
                    print(f"    ✅ Prepared {rec_file.name}")
                else:
                    print(f"    ❌ Failed to prepare {uid}")
            
            lig_files = []
            for i, smi in enumerate(smiles_list, 1):
                print(f"  - Processing ligand {i}...")
                lig_file = prepare_ligand(smi, i)
                if lig_file:
                    lig_files.append(lig_file)
                    print(f"    ✅ Prepared {lig_file.name}")
                else:
                    print(f"    ❌ Failed to prepare ligand {i}")
            
            if not rec_files or not lig_files:
                raise ValueError("Structure preparation failed")
            
            # Run docking (only matching pairs)
            print("\n⚡ Running Docking on Known Pairs...")
            results = []
            
            for rec_file, lig_file in zip(rec_files, lig_files):
                rec_name = Path(rec_file).stem.replace("receptor_", "")
                lig_name = Path(lig_file).stem.replace("ligand_", "")
                dock_dir = RESULTS_DIR / f"{rec_name}_vs_{lig_name}"
                
                print(f"\n🔬 Docking {rec_name} + {lig_name}")
                scores = run_docking(rec_file, lig_file, dock_dir, box_size)
                
                if scores:
                    best_score = min(scores)
                    avg_score = np.mean(scores)
                    results.append({
                        'Receptor': rec_name,
                        'Ligand': lig_name,
                        'Best Score': f"{best_score:.2f}",
                        'Average': f"{avg_score:.2f}",
                        'Poses': len(scores),
                        'Directory': str(dock_dir)
                    })
                    print(f"    ✅ Success! Best score: {best_score:.2f} kcal/mol")
                else:
                    print("    ❌ Docking failed - check Vina output above")
            
            # Show results
            if results:
                print("\n📊 Docking Results:")
                display(pd.DataFrame(results))
                
                # Visualize all results
                for result in results:
                    print(f"\n👀 Visualizing {result['Receptor']} + {result['Ligand']}:")
                    rec_pdb = RECEPTOR_DIR / f"receptor_{result['Receptor']}.pdb"
                    lig_pdb = LIGAND_DIR / f"ligand_{result['Ligand'].split('_')[0]}.pdb"
                    docked_pdbqt = Path(result['Directory']) / "docked.pdbqt"
                    
                    display(visualize_results(rec_pdb, lig_pdb, docked_pdbqt))
            else:
                print("\n❌ No successful docking results - please check:")
                print("1. Vina is properly installed and in PATH")
                print("2. The receptor files contain valid coordinates")
                print("3. The box center is within the protein structure")
            
            print("\n🎉 Workflow completed!")
        
        except Exception as e:
            print(f"\n❌ Error: {str(e)}")

# Connect button
run_btn.on_click(on_run_clicked)
print("✅ Ready to dock! Using pre-configured working pairs")

✅ Ready to dock! Using pre-configured working pairs
