<div style="display: flex; align-items: center;">
    <span style="font-size: 24px; color: #003366; font-weight: 500;">Vina_GPU_2.0 Pipeline</span>
    <img src="logo.jpg" style="height: 50px; width: auto; margin-left: auto;"/>
</div>

#### Import Necessary Libraries and Modules

In [1]:
import os
import re
import torch
import random
import string
import subprocess
import pandas as pd
from typing import Optional, List
import multiprocessing as mp
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors, Draw 
from smiles_conversion_utils import *

disabling protonation states for smi2sdf


#### Check GPU availability

In [2]:
if "CUDA_VISIBLE_DEVICES" not in os.environ:
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

if torch.cuda.is_available():
    device = torch.device("cuda")
    gpu_info = os.popen('nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits').readlines()
    gpu_available = 100 - int(gpu_info[0].strip())
    print(f"\033[1m\033[34mGPU availability in own1: \033[91m{gpu_available}%\033[0m")
else:
    device = torch.device("cpu")
    print('GPU is not available, using CPU instead')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

[1m[34mGPU availability in own1: [91m96%[0m


<div style="background-color:#4B6587; color:#F0E5CF; padding: 3px; border-radius: 10px;">
    <h2> Step 1. SMILES to SDF</h2>
</div>

In [3]:
input_csv = "aganitha_data/ligands_smiles.csv"
output_sdf = "aganitha_data/ligands_sdf"
os.makedirs(output_sdf, exist_ok=True)

In [4]:
df = pd.read_csv(input_csv)

display(df.head())
print(df.shape)

Unnamed: 0,Name,SMILES
0,AMG650,CC1=CC(=NC(=N1)N2CCC(CC2)(F)F)NC(=O)C3=C(C=C(C...
1,BB_35,c1(c(ccc(c1)NS(CCO)(=O)=O)C(Nc1cccc(c1)N1CCC(C...
2,BB_40,c12ccc(cc1C1(CN2C(c2cc(ccc2)S(NC(C)(C)C)(=O)=O...
3,BB_36,c1(c(ncc(n1)NC(CO)(C)C)C(Nc1cccc(n1)S(NC(C)(C)...
4,BB_37,c12c(cc(cc1N1CCC3(CC1)CC3)NS(CCO)(=O)=O)ncnc2N...


(23, 2)


In [5]:
num_conformations = 5  
idx_conformer = 0  
num_cpus = os.cpu_count()  

for index, row in df.iterrows():
    smiles = row['SMILES']
    mol_name = row['Name']
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        mol = get_structure(mol, num_conformations, idx_conformer)
        if mol is not None:
            sdf_filename = os.path.join(output_sdf, f"{mol_name}.sdf")
            molecule_to_sdf(mol, sdf_filename, name=mol_name)

print(f"\033[1m\033[34mSMILES to SDF Conversion Completed and saved in folder: \033[91m{output_sdf}\033[0m")

[1m[34mSMILES to SDF Conversion Completed and saved in folder: [91maganitha_data/ligands_sdf[0m


<div style="background-color:#4B6587; color:#F0E5CF; padding: 3px; border-radius: 10px;">
    <h2>Step 2. SDF to PDBQT</h2>
</div>

In [6]:
input_sdf = "aganitha_data/ligands_sdf"
output_pdbqt = "aganitha_data/ligands_pdbqt"
os.makedirs(output_pdbqt, exist_ok=True)

In [7]:
for sdf_file in os.listdir(input_sdf):
    if sdf_file.endswith(".sdf"):
        base_filename = os.path.splitext(sdf_file)[0]
        output_logs = os.path.join(output_pdbqt, f"{base_filename}.pdbqt")

        command = [
            "mk_prepare_ligand.py",
            "-i", os.path.join(input_sdf, sdf_file),
            "-o", output_logs
        ]
        subprocess.run(command)

generated_files = os.listdir(output_pdbqt)
print(f"\033[1m\033[34mSDF to PDBQT Conversion Completed and saved in folder: \033[91m{output_pdbqt}\033[0m")

[1m[34mSDF to PDBQT Conversion Completed and saved in folder: [91maganitha_data/ligands_pdbqt[0m


<div style="background-color:#4B6587; color:#F0E5CF; padding: 3px; border-radius: 10px;">
    <h2>Step 3. Pass PDBQT files to Vina_GPU_2.0</h2>
</div>

In [8]:
input_protein = "aganitha_data/kif18a.pdbqt"
input_ligands = "aganitha_data/ligands_pdbqt"

In [11]:
command = (
    f"../Vina-GPU "
    f"--receptor {input_protein} "
    f"--ligand_directory {input_ligands} "
    f"--center_x 130.64 "
    f"--center_y 115.49 "
    f"--center_z 131.85 "
    f"--size_x 23.71 "
    f"--size_y 23.22 "
    f"--size_z 22.74 "
    f"--thread 8000 "
    f"> {output_logs}"
)
open(output_logs := 'aganitha_data/output.txt', 'w').close()
print(f"\033[1m\033[34mCheck the progress: \033[91m{output_logs}\033[0m")
subprocess.run(command, shell=True)
print(f"\033[1m\033[34mSDF to PDBQT Conversion Completed and saved in folder: \033[91m{input_ligands}_out\033[0m")

[1m[34mCheck the progress: [91maganitha_data/output.txt[0m



Parse error on line 1 in file "aganitha_data/ligands_pdbqt/BB_37.pdbqt": Unknown or inappropriate tag

Parse error on line 54 in file "aganitha_data/ligands_pdbqt/AGAN-209.pdbqt": ATOM syntax incorrect: "CG0" is not a valid AutoDock type. Note that AutoDock atom types are case-sensitive.


[1m[34mSDF to PDBQT Conversion Completed and saved in folder: [91maganitha_data/ligands_pdbqt_out[0m


<div style="background-color:#4B6587; color:#F0E5CF; padding: 3px; border-radius: 10px;">
    <h2>Step 4. Extract Affinity values</h2>
</div>

In [12]:
output = open(output_logs, "r").read()
pattern = r'Refining ligand (.+?) results.*?\n\s*1\s+([-+]?\d*\.\d+)\s+\d+\.\d+\s+\d+\.\d+'
matches = re.findall(pattern, output, re.DOTALL)
matches = [(os.path.basename(name), float(affinity)) for name, affinity in matches]
df = pd.DataFrame(matches, columns=['Name', 'Affinity (kcal/mol)'])
df['Affinity (kcal/mol)'] = pd.to_numeric(df['Affinity (kcal/mol)'], errors='coerce')
df = df.sort_values(by='Affinity (kcal/mol)', ascending=True).reset_index(drop=True)

display(df.head())
print(df.shape)

Unnamed: 0,Name,Affinity (kcal/mol)
0,AGAN245-df-11-CN,-13.8
1,6,-13.6
2,BB_40,-13.4
3,AGAN 245-df-2-CN,-13.4
4,BB_38,-13.3


(21, 2)
