# Analysis 2: Molecular docking

Before you proceed this workflow, I encourage you to follow the tutorials on ```00_tutorials/``` folder prior programming to have everything downloaded.

In this workflow, we will be docking our ligand with multiple human proteins.

Make sure you already have installed ```requests```, ```biopython```, ```openbabel```, ```MDAnalysis```, ```numpy```, ```pandas``` libraries. Otherwise, copy the following lines and paste it on the terminal:

1. ```conda activate vina```
2. ```sudo apt install openbabel```
3. ```conda install requests biopython openbabel MDAnalysis numpy pandas```

# 1. Create PDBQT file for Ligand

Get the SDF file of ligand from PubChem: https://pubchem.ncbi.nlm.nih.gov/

In [None]:
from docking_functions.PrepareLigand import prepare_ligand # custom function

# SMILES from ligand
ligand_smiles = "files/caffeine.sdf" # Example; Remove to actual file
# Output directory
output_dir = "files/"

prepare_ligand(
    input_file= ligand_smiles, # SDF file
    output_dir=output_dir # Folder to store PDBQT file
)

## 2. Create PDBQT file from Protein

We will download PDB file directly from PDB through request

In [None]:
from docking_functions.DownloadPDB import download_pdb_file # custom function

# ID from Protein DataBase (PDB)
pdb_ids = ["2BXP"] # Add more as needed. Example: ["2BXP", "3KCD", ..., "4KMD"]

pdb_filenames = [] # List to store downloaded PDB filenames

for pdb_id in pdb_ids:  
    # Download PDB file  
    pdb_filename = download_pdb_file(
        pdb_id=pdb_id, # ID from PDB
        output_dir=output_dir # Folder to store PDB
    )
    # Append PDB filename to list
    pdb_filenames.append(pdb_filename)

# Print summary ========
print("\n", "="*50)
print(f"\nTotal of files downloaded: {len(pdb_filenames)}\n")
for _ in pdb_filenames:
    print(f"\t-{_}")

Convert PDB file from Protein to PDBQT

In [None]:
from docking_functions.PrepareReceptor import clean_and_convert_pdb_to_pdbqt # custom function

# Iterate over each filename downloaded

pdbqt_filenames = [] # List to store converted PDBQT filenames

for pdb_filename in pdb_filenames:
    # 1. Remove non-aminoacids elements from PDB
    # 2. Convert PDB to PDBQT
    cleaned_pdb, output_pdbqt = clean_and_convert_pdb_to_pdbqt(
        input_receptor=pdb_filename
    )
    # Append PDBQT filename to list
    pdbqt_filenames.append(output_pdbqt)

# Print summary ========
print("\n", "="*50)
print(f"\nTotal of files downloaded: {len(pdbqt_filenames)}\n")
for _ in pdbqt_filenames:
    print(f"\t-{_}")

# 3. Run Molecular Docking

Copy and paste the following cell as needed...
Depending on how many proteins you want to dock.

In [None]:
# Store docking results to export it as CSV file
docking_results = []

In [None]:
import os
from docking_functions.BlindDocking import adjust_box_size, compute_bounding_box, calculate_center_of_mass, run_vina_blind_docking # custom functions
from docking_functions.docking_utils import extract_first_pose_results, process_docking_results, export_docking_results

# Define your protein-ligand pairs
protein_ligand_pairs = [
    {
        'protein': "files/2BXP.pdbqt",
        'ligand': "files/caffeine.pdbqt",
        'output_name': "2BXP_caffeine",
        'ligand_minimized': True
    },
    # Add more pairs as needed
]

for pair in protein_ligand_pairs:
    try:
        print(f"\n" + "="*50)
        print(f"Running docking for: {os.path.basename(pair['protein'])} - {os.path.basename(pair['ligand'])}")
        print("="*50)
        
        # Run Autodock Vina
        vina_results = run_vina_blind_docking(
            # 1. Protein PDBQT filename
            receptor_pdbqt=pair['protein'],
            # 2. Ligand PDBQT filename
            ligand_pdbqt=pair['ligand'],
            # 3. Is ligand structure energy minimized?
            ligand_minimized=pair['ligand_minimized'],
            output_folder=f"output/docking/{pair['output_name']}",
            # 4. Center of protein
            center=calculate_center_of_mass(pair['protein']),
            # 5. Box grid size
            size=adjust_box_size(compute_bounding_box(pair['protein'])),
            # 6. Computational power (Minimum = 8; Optimal = 32)
            exhaustiveness=8,
            # 7. Number of conformational ligand poses
            num_modes=9,
            # 8. Output filename (Stored as zip)
            output_filename=pair['output_name'],
            # 9. Number of CPUs
            cpu=-1
        )

        # Extract first pose results using the new function
        result_dict = extract_first_pose_results(
            vina_results,
            pair['protein'],
            pair['ligand'],
            pair['output_name']
        )

        if result_dict:
            docking_results.append(result_dict)
            print(f"- Successfully completed docking for {os.path.basename(pair['protein'])}")
        else:
            print(f"- No results obtained for {os.path.basename(pair['protein'])}")

    except Exception as e:
        print(f"- Error running blind docking for {pair['protein']}: {e}")

In [None]:
import pandas as pd

# DataFrame filename
df_filename = "output/docking_results_summary.csv"

# Create DataFrame from all results
if docking_results:
    df_results = pd.DataFrame(docking_results)
    
    print("\n" + "="*50)
    print("All Docking Results:")
    print("="*50)
    print(df_results)
    
    # Export results using the export function
    export_docking_results(df_results, 
                           df_filename,
                           include_timestamp=False)
    
    # Optional: Sort by affinity
    df_sorted = df_results.sort_values('affinity_kcal_mol')
    print("\n" + "="*50)
    print("Results sorted by affinity (best to worst):")
    print("="*50)
    print(df_sorted[['protein_file', 'ligand_file', 'affinity_kcal_mol']])
    
else:
    print("No docking results were collected.")