<div style="display: flex; align-items: center;">
    <span style="font-size: 24px; color: #003366; font-weight: 500;">Vina_GPU_2.0 Pipeline</span>
    <img src="logo.jpg" style="height: 50px; width: auto; margin-left: auto;"/>
</div>

#### Import Necessary Libraries and Modules

In [None]:
import os
import re
import torch
import random
import string
import subprocess
import pandas as pd
from typing import Optional, List
import multiprocessing as mp
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors, Draw 
from rich.progress import Progress
from smiles_conversion_utils import *

#### Check GPU availability

In [None]:
if "CUDA_VISIBLE_DEVICES" not in os.environ:
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

if torch.cuda.is_available():
    device = torch.device("cuda")
    gpu_info = os.popen('nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits').readlines()
    gpu_available = 100 - int(gpu_info[0].strip())
    print(f"\033[1m\033[34mGPU availability in own1: \033[91m{gpu_available}%\033[0m")
else:
    device = torch.device("cpu")
    print('GPU is not available, using CPU instead')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 1. SMILES to SDF </h2>
</div>

In [None]:
input_csv = "aganitha_data/ligands_smiles.csv"
output_sdf = "aganitha_data/ligands_sdf"
os.makedirs(output_sdf, exist_ok=True)

In [None]:
df = pd.read_csv(input_csv)
display(df.head())
print(df.shape)

In [None]:
def generate_sdf_from_smiles(df, output_sdf, num_conformations=5, idx_conformer=0):
    
    total = len(df)
    with Progress() as progress:
        task1 = progress.add_task(" [red]Converting SMILES to SDF...", total=total)

        for index, row in enumerate(df.iterrows(), start=1):
            smiles, mol_name = row[1]['SMILES'], row[1]['Name']
            mol = Chem.MolFromSmiles(smiles)
            if mol is not None:
                mol = get_structure(mol, num_conformations, idx_conformer)
                if mol is not None:
                    sdf_filename = os.path.join(output_sdf, f"{mol_name}.sdf")
                    molecule_to_sdf(mol, sdf_filename, name=mol_name)
            
            progress.update(task1, advance=1)  
            progress.refresh()

    progress.stop()

    print(f"\033[1m\033[34mSMILES to SDF conversion completed and files saved in folder: \033[91m{output_sdf}\033[0m")

generate_sdf_from_smiles(df, output_sdf)

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 2. SDF to PDBQT </h2>
</div>

In [None]:
input_sdf = "aganitha_data/ligands_sdf"
output_pdbqt = "aganitha_data/ligands_pdbqt"
os.makedirs(output_pdbqt, exist_ok=True)

In [None]:
def convert_sdf_to_pdbqt(input_sdf, output_pdbqt):
    generated_files = []
    
    with Progress() as progress:
        task1 = progress.add_task(" [red]Converting SDF to PDBQT...", total=len(os.listdir(input_sdf)))

        for sdf_file in os.listdir(input_sdf):
            if sdf_file.endswith(".sdf"):
                base_filename = os.path.splitext(sdf_file)[0]
                output_file = os.path.join(output_pdbqt, f"{base_filename}.pdbqt")
                generated_files.append(output_file)

                meeko = [
                    "mk_prepare_ligand.py",
                    "-i", os.path.join(input_sdf, sdf_file),
                    "-o", output_file
                ]
                subprocess.run(meeko)
                progress.update(task1, advance=1)
                progress.refresh()
    progress.stop()

    return generated_files

generated_files = convert_sdf_to_pdbqt(input_sdf, output_pdbqt)
print(f"\033[1m\033[34mSDF to PDBQT conversion completed and files saved in folder: \033[91m{output_pdbqt}\033[0m")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 3. Vina_GPU_2.0 Docking </h2>
</div>

In [None]:
input_protein = "aganitha_data/kif18a.pdbqt"
input_ligands = "aganitha_data/ligands_pdbqt"

In [None]:
open(output_logs := 'aganitha_data/output.txt', 'w').close()

command = (
    f"../Vina-GPU "
    f"--receptor {input_protein} "
    f"--ligand_directory {input_ligands} "
    f"--center_x 130.64 "
    f"--center_y 115.49 "
    f"--center_z 131.85 "
    f"--size_x 23.71 "
    f"--size_y 23.22 "
    f"--size_z 22.74 "
    f"--thread 8000 "
    f"> {output_logs}"
)

print(f"\033[1m\033[34mDocking in Progress... \033[91m{output_logs}\033[0m")
subprocess.run(command, shell=True)
print(f"\033[1m\033[34mDocking Process completed and files saved in folder: \033[91m{input_ligands}_out\033[0m")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 4. Extract Affinity values </h2>
</div>

In [None]:
def extract_affinity_from_logs(output_logs):
    with open(output_logs, "r") as file:
        output = file.read()

    pattern = r'Refining ligand (.+?) results.*?\n\s*1\s+([-+]?\d*\.\d+)\s+\d+\.\d+\s+\d+\.\d+'
    matches = re.findall(pattern, output, re.DOTALL)
    matches = [(os.path.basename(name), float(affinity)) for name, affinity in matches]

    df = pd.DataFrame(matches, columns=['Name', 'Affinity (kcal/mol)'])
    df['Affinity (kcal/mol)'] = pd.to_numeric(df['Affinity (kcal/mol)'], errors='coerce')
    df = df.sort_values(by='Affinity (kcal/mol)', ascending=True).reset_index(drop=True)
    return df

df = extract_affinity_from_logs(output_logs)
display(df.head())
print(df.shape)

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 5. PDBQT to SDF </h2>
</div>

In [None]:
input_result_pdbqt = "aganitha_data/ligands_pdbqt_out"
output_result_sdf = "aganitha_data/ligands_sdf_out"
os.makedirs(output_result_sdf, exist_ok=True)

In [None]:
def convert_pdbqt_to_sdf(input_folder, output_folder):

    with Progress() as progress:
        task1 = progress.add_task(" [red]Converting Result files PDBQT to SDF ...", total=len(os.listdir(input_folder)))

        pdbqt_files = [f for f in os.listdir(input_folder) if f.endswith('.pdbqt')]
    
        for pdbqt_file in pdbqt_files:
            pdbqt_path = os.path.join(input_folder, pdbqt_file)
            sdf_file = os.path.splitext(pdbqt_file)[0] + '.sdf'
            sdf_path = os.path.join(output_folder, sdf_file)
    
            meeko = (
                "mk_export.py "
                f"{pdbqt_path} "
                f"-o {sdf_path}"
            )
            subprocess.run(meeko, shell=True)
            progress.update(task1, advance=1)
            progress.refresh()
    progress.stop()


convert_pdbqt_to_sdf(input_result_pdbqt, output_result_sdf)
print(f"\033[1m\033[34mPDBQT to SDF Conversion Completed and saved in folder: \033[91m{output_result_sdf}\033[0m")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 6. Check SDF File </h2>
</div>

In [None]:
sdf_file_path = 'aganitha_data/ligands_sdf_out/AMG650_out.sdf'

In [None]:
def process_sdf_file(sdf_file_path):
    supplier = Chem.SDMolSupplier(sdf_file_path)

    for mol in supplier:
        if mol is not None:
            if mol.GetNumConformers() > 0:
                conf = mol.GetConformer()
                for atom in mol.GetAtoms():
                    pos = conf.GetAtomPosition(atom.GetIdx())
                    print(f"Atom {atom.GetIdx()}: {pos.x}, {pos.y}, {pos.z}")

                img_size = (500, 500)  
                img = Draw.MolToImage(mol, size=img_size)
                img.show()

process_sdf_file(sdf_file_path)