# SYNTHALIS DOCKING

#  Upgraded Synthalis Molecular Docking Workflow
# **Features**:
# - Multiple ligand/receptor input
# - Automatic structure preparation
# - Cross-docking results matrix

## Imports and Setup

In [1]:
import os
import re
import sys
import shutil
import requests
import numpy as np 
import pandas as pd
import py3Dmol
import subprocess
import ipywidgets as widgets
from pathlib import Path
from IPython.display import display, clear_output, HTML
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

# Configure directories
BASE_DIR = Path.cwd()
DATA_DIR = BASE_DIR / "synthalisdock_data"
RECEPTOR_DIR = DATA_DIR / "receptors"
LIGAND_DIR = DATA_DIR / "ligands"
RESULTS_DIR = DATA_DIR / "results"

# Create directories
for d in [DATA_DIR, RECEPTOR_DIR, LIGAND_DIR, RESULTS_DIR]:
    d.mkdir(exist_ok=True)

print("✅ Environment initialized")


✅ Environment initialized


 ##  TOOL VERIFICATION

## Check Tools

In [2]:
def check_tools_installed():
    """Verify required tools are available"""
    required_tools = {
        'python2': 'Python 2',
        'prepare_receptor4.py': 'AutoDockTools',
        'prepare_ligand4.py': 'AutoDockTools', 
        'vina': 'AutoDock Vina',
        'obabel': 'OpenBabel'
    }

    missing = []
    for tool, src in required_tools.items():
        path = shutil.which(tool)
        if not path:
            missing.append(f"{tool} ({src})")
        else:
            print(f"✅ Found {tool} at: {path}")

    if missing:
        raise RuntimeError(
            "Missing required tools:\n" + 
            "\n".join(f"❌ {m}" for m in missing) +
            "\n\nInstall with:\n" +
            "sudo apt-get install autodock-vina openbabel\n" +
            "And install MGLTools from http://mgltools.scripps.edu"
        )

# Test tools
try:
    check_tools_installed()
    print("🎉 All tools configured!")
except Exception as e:
    print(e)


✅ Found prepare_receptor4.py at: /home/imraan/mgltools_x86_64Linux2_1.5.7/bin/prepare_receptor4.py
✅ Found prepare_ligand4.py at: /home/imraan/mgltools_x86_64Linux2_1.5.7/bin/prepare_ligand4.py
✅ Found vina at: /home/imraan/miniforge3/envs/molecule_dock/bin/vina
✅ Found obabel at: /home/imraan/Downloads/Jupyter_Dock/mgltools_x86_64Linux2_1.5.7/bin/obabel
Missing required tools:
❌ python2 (Python 2)

Install with:
sudo apt-get install autodock-vina openbabel
And install MGLTools from http://mgltools.scripps.edu


## Receptor Preparation 

In [3]:
def prepare_receptor(uniprot_id):
    """Download protein structure from AlphaFold"""
    try:
        # Validate UniProt ID format
        if not re.match(r"^[A-Z0-9]{6,10}$", uniprot_id):
            raise ValueError(f"Invalid UniProt ID format: {uniprot_id}")
            
        af_url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-model_v4.pdb"
        local_file = RECEPTOR_DIR / f"receptor_{uniprot_id}.pdb"
        
        print(f"  - Downloading {uniprot_id}...")
        response = requests.get(af_url, timeout=30)
        response.raise_for_status()
        
        with open(local_file, 'w') as f:
            f.write(response.text)
            
        return str(local_file)
    except Exception as e:
        print(f"❌ Failed to prepare receptor {uniprot_id}: {str(e)}")
        return None

# Test receptor download
test_receptor = prepare_receptor("P00520")  # Try with tyrosine kinase
print(f"Test receptor saved to: {test_receptor}")

  - Downloading P00520...
Test receptor saved to: /home/imraan/Jupyter_Dock/synthalisdock_data/receptors/receptor_P00520.pdb


In [4]:
import shutil
print("OpenBabel path:", shutil.which("obabel"))

OpenBabel path: /home/imraan/Downloads/Jupyter_Dock/mgltools_x86_64Linux2_1.5.7/bin/obabel


## Ligand Preparation

In [5]:
def prepare_ligand(smiles: str, idx: int):
    """Convert SMILES to 3D structure"""
    try:
        mol = Chem.MolFromSmiles(smiles)
        if not mol:
            raise ValueError("Invalid SMILES")
            
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol)
        AllChem.MMFFOptimizeMolecule(mol)
        
        lig_file = LIGAND_DIR / f"ligand_{idx}.pdb"
        lig_file.unlink(missing_ok=True)  # Remove if exists
        
        with Chem.PDBWriter(str(lig_file)) as w:
            w.write(mol)
            
        if not lig_file.exists():
            raise IOError("Failed to save ligand")
            
        return str(lig_file)
    except Exception as e:
        print(f"Ligand {idx} failed: {str(e)}")
        return None


## AUTODOCK Vina Execution

In [None]:
# ======================
# UPDATED DOCKING FUNCTION
# ======================

def run_vina_docking(rec_pdbqt, lig_pdbqt, output_dir):
    """Run AutoDock Vina docking with comprehensive error handling"""
    try:
        output_dir.mkdir(exist_ok=True, parents=True)
        
        # Verify input files exist
        if not rec_pdbqt.exists():
            raise FileNotFoundError(f"Receptor file not found: {rec_pdbqt}")
        if not lig_pdbqt.exists():
            raise FileNotFoundError(f"Ligand file not found: {lig_pdbqt}")
        
        # Run Vina with timeout
        result = subprocess.run([
            "vina",
            "--receptor", str(rec_pdbqt),
            "--ligand", str(lig_pdbqt),
            "--center_x", "15", "--center_y", "15", "--center_z", "15",
            "--size_x", "30", "--size_y", "30", "--size_z", "30",
            "--exhaustiveness", "8",
            "--num_modes", "5",
            "--out", str(output_dir / "docked.pdbqt"),
            "--log", str(output_dir / "log.txt")
        ], capture_output=True, text=True, timeout=300, check=True)
        
        # Parse scores
        scores = []
        with open(output_dir / "log.txt") as f:
            for line in f:
                if "Affinity" in line:
                    try:
                        scores.append(float(line.split()[1]))
                    except:
                        continue
        
        if not scores:
            raise ValueError("No docking scores found in output")
            
        return scores
        
    except subprocess.TimeoutExpired:
        print("❌ Docking timed out after 5 minutes")
        return None
    except subprocess.CalledProcessError as e:
        print(f"❌ Vina error: {e.stderr}")
        return None
    except Exception as e:
        print(f"❌ Docking failed: {str(e)}")
        return None

## Binding Site Detection

In [None]:
def detect_binding_site(pdb_file):
    """Find binding site coordinates withrun_vina_docking PyMOL"""
    try:
        from pymol import cmd
        cmd.reinitialize()
        cmd.load(str(pdb_file))
        cmd.remove("solvent")
        cmd.select("binding_site", "byres chain A around 5")
        center = cmd.centerofmass("binding_site")
        print(f"🔍 Binding site at: {center}")
        return center
    except Exception as e:
        print(f"⚠️ Using default coordinates: {str(e)}")
        return [15, 15, 15]

## Visualisation

In [None]:
def visualize_docking_results(rec_path, lig_path, docked_path, width=800, height=600):
    """Enhanced 3D visualization with multiple viewing angles"""
    viewer = py3Dmol.view(width=width, height=height)
    
    # Add receptor with two visualization styles
    with open(rec_path) as f:
        viewer.addModel(f.read(), 'pdb')
    viewer.setStyle({'chain': 'A'}, {'cartoon': {'color': 'spectrum'}})
    viewer.setStyle({'hetflag': True}, {'stick': {'radius': 0.3}})
    
    # Add original ligand
    with open(lig_path) as f:
        viewer.addModel(f.read(), 'pdb')
    viewer.setStyle({'model': -1}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
    
    # Add docked poses if available
    if docked_path.exists():
        with open(docked_path) as f:
            viewer.addModel(f.read(), 'pdbqt')
        
        # Color docked poses by energy (red = worst, blue = best)
        viewer.setStyle({'model': -1}, {
            'stick': {
                'colorscheme': 'rwb', 
                'radius': 0.3,
                'func': 'colorrbf',
                'min': -5,  # Worst score
                'max': -10  # Best score
            }
        })
    
    # Add measurement labels
    viewer.addResLabels({'chain': 'A', 'resn': ['ALA','VAL','LEU','ILE']})
    viewer.zoomTo()
    
    # Create multiple views
    viewer.addModel(viewer.getModel(), 'view1')
    viewer.setViewStyle({'style': 'outline', 'width': 0.1})
    viewer.addView(1, [0, 0, 0], [1, 0, 0])
    viewer.addView(2, [0, 0, 0], [0, 1, 0])
    
    return viewer

# INPUT INTERFACE

In [7]:
# Custom CSS
display(HTML("""
<style>
    .widget-output {
        min-height: 600px !important;
        max-height: 800px !important;
        overflow-y: auto !important;
    }
    .viewer_3Dmoljs {
        height: 500px !important;
        width: 100% !important;
    }
</style>
"""))

# Input fields
uniprot_boxes = [
    widgets.Text(
        value="P00520" if i == 0 else "",
        placeholder=f'UniProt ID {i+1}',
        layout=widgets.Layout(width='95%')
    ) for i in range(3)
]

smiles_boxes = [
    widgets.Text(
        value="CN1C=NC2=C1C(=O)N(C(=O)N2C)C" if i == 0 else "",
        placeholder=f'SMILES {i+1}',
        layout=widgets.Layout(width='95%')
    ) for i in range(3)
]

# Button and output
process_btn = widgets.Button(
    description="🚀 Run Docking",
    button_style='success',
    layout=widgets.Layout(width='300px', height='40px')
)

output = widgets.Output(layout={
    'border': '1px solid #ddd',
    'min_height': '600px'
})

# Display UI
display(widgets.VBox([
    widgets.HTML("<h1>Synthalis Docking</h1>"),
    widgets.HTML("<h3>Receptors (UniProt IDs)</h3>"),
    widgets.VBox(uniprot_boxes),
    widgets.HTML("<h3>Ligands (SMILES)</h3>"),
    widgets.VBox(smiles_boxes),
    process_btn,
    output
]))

VBox(children=(HTML(value='<h1>Synthalis Docking</h1>'), HTML(value='<h3>Receptors (UniProt IDs)</h3>'), VBox(…

## Main Workflow

In [None]:
def on_run_clicked(btn):
    with output:
        output.clear_output()
        print("⚡ Starting workflow...")
        
        try:
            # 1. Get inputs
            uniprot_ids = [b.value.strip() for b in uniprot_boxes if b.value.strip()]
            smiles_list = [b.value.strip() for b in smiles_boxes if b.value.strip()]
            
            if not uniprot_ids or not smiles_list:
                raise ValueError("Please provide at least 1 receptor and 1 ligand")
            
            # 2. Prepare structures
            rec_paths = [prepare_receptor(uid) for uid in uniprot_ids]
            rec_paths = [p for p in rec_paths if p]
            
            lig_paths = [prepare_ligand(smi, i+1) for i, smi in enumerate(smiles_list)]
            lig_paths = [p for p in lig_paths if p]
            
            if not rec_paths or not lig_paths:
                raise ValueError("Structure preparation failed")
            
            # 3. Run docking
            results = []
            for rec_path in rec_paths:
                for lig_path in lig_paths:
                    rec_name = Path(rec_path).stem.replace("receptor_", "")
                    lig_name = Path(lig_path).stem.replace("ligand_", "")
                    dock_dir = RESULTS_DIR / f"{rec_name}_vs_{lig_name}"
                    dock_dir.mkdir(exist_ok=True)
                    
                    print(f"\n🔬 Docking {rec_name} vs {lig_name}")
                    scores = run_docking(rec_path, lig_path, dock_dir)
                 run_vina_docking   
                    if scores:
                        avg_score = np.mean(scores)
                        results.append({
                            'Receptor': rec_name,
                            'Ligand': lig_name,
                            'Best Score': f"{min(scores):.2f}",
                            'Average': f"{avg_score:.2f}",
                            'Directory': str(dock_dir)
                        })
                        print(f"✅ Success! Best score: {min(scores):.2f} kcal/mol")
                    else:
                        print("❌ Docking failed")
            
            # 4. Show results
            if results:
                print("\n📊 Results Summary:")
                display(pd.DataFrame(results))
                
                # Visualize first result
                first = results[0]
                viewer = visualize_3d(
                    RECEPTOR_DIR / f"receptor_{first['Receptor']}.pdb",
                    LIGAND_DIR / f"ligand_{first['Ligand'].split('_')[0]}.pdb",
                    Path(first['Directory']) / "docked.pdbqt"
                )
                display(viewer)
            else:
                print("No successful docking results")
                
            print("\n🎉 Workflow completed!")
            
        except Exception as e:
            print(f"\n❌ Error: {str(e)}")

# Connect button
process_btn.on_click(on_run_clicked)
print("✅ Ready to dock! Enter receptors/ligands and click the button")