<div style="display: flex; align-items: center;">
    <span style="font-size: 24px; color: #003366; font-weight: 500;">Uni-Dock Pipeline</span>
    <img src="z_docking_files/logo.jpg" style="height: 50px; width: auto; margin-left: auto;"/>
</div>

#### Import Necessary Libraries and Modules

In [None]:
import os
import re
import csv 
import sys
import time
import math
import torch
import random
import shutil
import psutil
import string
import logging
import subprocess
import pandas as pd
import concurrent.futures
import ipywidgets as widgets
import multiprocessing as mp

from glob import glob
from typing import Optional, List
from IPython.display import display
from rdkit import Chem, DataStructs
from z_docking_files.utils import *
from openbabel import openbabel, pybel
from multiprocessing import Pool, cpu_count
from concurrent.futures import ThreadPoolExecutor
from rdkit.Chem import AllChem, Descriptors, Draw 

In [None]:
device = check_availability()

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 1. Load Data </h2>
</div>

In [None]:
start_time = time.time()
folder_name = "brd4_hll"
pdb_file = "brd4.pdb"  
pdbqt_file = "brd4.pdbqt"
input_csv = "input.csv"
threshold = 'dynamic'

In [None]:
os.makedirs(os.path.join(folder_name, "pipeline_files", "execution_time"), exist_ok=True)

In [None]:
input_smiles = os.path.join(folder_name, input_csv)
df = pd.read_csv(input_smiles)

df_salt = df[df['SMILES'].str.contains('\.')].copy()
print(f"\033[1m\033[34mNumber of salted compounds: \033[91m{len(df_salt)}\033[0m")
df_salt.to_csv(f'{folder_name}/salted_compounds.csv', index=False)

df_no_salt = df[~df['SMILES'].str.contains('\.')]
display(df_no_salt.tail())
print(df_no_salt.shape)

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 2. SMILES to SDF </h2>
</div>

In [None]:
start_time1 = time.time()

output_sdf = os.path.join(folder_name, "pipeline_files/1_sdf")
os.makedirs(os.path.join(folder_name, "pipeline_files", "1_sdf"), exist_ok=True)

convert_smiles_to_sdf_parallel(df_no_salt, output_sdf, num_conformations=10)

end_time1 = time.time()
elapsed_time_minutes = round((end_time1 - start_time1)/60, 2)
file_path = os.path.join(folder_name, "pipeline_files/execution_time/1_smiles_to_sdf.txt")
with open(file_path, "w") as file:
    file.write(f"{elapsed_time_minutes} minutes")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 3. SDF to Mol2 and Mol2 to PDBQT </h2>
</div>

In [None]:
start_time2 = time.time()

script_path = os.path.join("z_docking_files", "1_sdf_to_pdbqt.sh")
result = subprocess.run(["/bin/bash", script_path, folder_name])
print(result.stderr)

end_time2 = time.time()
elapsed_time_minutes = round((end_time2 - start_time2)/60, 2)
file_path = os.path.join(folder_name, "pipeline_files/execution_time/2_sdf_to_pdbqt.txt")
with open(file_path, "w") as file:
    file.write(f"{elapsed_time_minutes} minutes")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 4. PDBQT Filter  </h2>
</div>

In [None]:
start_time3 = time.time()

script_path = os.path.join("z_docking_files", "2_pdbqt_to_smiles.sh")
result = subprocess.run(["/bin/bash", script_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
print(result.stdout)

In [None]:
check_pdbqt_files(folder_name, input_smiles)
copy_correct_pdbqt_files(folder_name, input_smiles)

end_time3 = time.time()
elapsed_time_minutes = round((end_time3 - start_time3)/60, 2)
file_path = os.path.join(folder_name, "pipeline_files/execution_time/3_pdbqt_filter_using_tanimoto.txt")
with open(file_path, "w") as file:
    file.write(f"{elapsed_time_minutes} minutes")

<div style="background-color:#8E44AD; color:#ECF0F1; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 5. Uni-Dock Docking </h2>
</div>

In [None]:
start_time4 = time.time()

protein = os.path.join(folder_name, pdbqt_file)
num_batches = create_ligands_path_batchwise(folder_name)
ligand_batches = [f"unidock_pdbqt_batch_{i+1}.txt" for i in range(num_batches)]  
output_result_base = os.path.join(folder_name, "pipeline_files/6_pdbqt_out")
os.makedirs(output_result_base, exist_ok=True)

for i, ligands_batch_file in enumerate(ligand_batches):
    protein = os.path.join(folder_name, pdbqt_file)
    ligands_path = os.path.join(folder_name, "pipeline_files", ligands_batch_file)
    output_result_pdbqt = output_result_base
    batch_output_logs = os.path.join(folder_name, f"pipeline_files/unidock_output_batch_{i+1}.txt")
    open(batch_output_logs, 'w').close()
    with open(ligands_path, "r") as batch_file:
        ligands = batch_file.read().strip()

    unidock_command = (
        f"unidock "
        f"--receptor {protein} "
        f"--gpu_batch $(cat {ligands_path}) "
        f"--search_mode detail "
        f"--scoring vina "
        f"--center_x 86.79 "
        f"--center_y 43.89 "
        f"--center_z 65.43 "
        f"--size_x 20.00 "
        f"--size_y 20.00 "
        f"--size_z 20.00 "
        f"--num_modes 10 "
        f"--dir {output_result_pdbqt} "
        f">> {batch_output_logs} 2>&1"
    )

    os.system(unidock_command)
    print(f"Docking Process completed for Batch_{i+1}")

print("\033[1m\033[34mDocking Process completed for all Batches and files saved in folder: \033[91m{}\033[0m".format(output_result_pdbqt))

end_time4 = time.time()
elapsed_time_minutes = round((end_time4 - start_time4)/60, 2)
file_path = os.path.join(folder_name, "pipeline_files/execution_time/4_docking.txt")
with open(file_path, "w") as file:
    file.write(f"{elapsed_time_minutes} minutes")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 6. Extract Affinity values </h2>
</div>

In [None]:
affinity_from_pdbqt_files(folder_name)

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 7. Extract Compounds based on Affinity threshold </h2>
</div>

In [None]:
extraction_based_on_threshold(folder_name, threshold)
extract_model1(folder_name)

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 8. PDBQT to Mol2 and Mol2 to SDF</h2>
</div>

In [None]:
start_time5 = time.time()

script_path = os.path.join("z_docking_files", "3_pdbqt_to_sdf.sh")
result = subprocess.run(["/bin/bash", script_path, folder_name])
print(result.stderr)


end_time5 = time.time()
elapsed_time_minutes = round((end_time5 - start_time5)/60, 2)
file_path = os.path.join(folder_name, "pipeline_files/execution_time/5_pdbqt_to_sdf.txt")
with open(file_path, "w") as file:
    file.write(f"{elapsed_time_minutes} minutes")

<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 9. PoseBusters Filter </h2>
</div>

In [None]:
start_time6 = time.time()

script_path = os.path.join("z_docking_files", "4_posebusters_filter.sh")
result = subprocess.run(["/bin/bash", script_path, folder_name, pdb_file])
print(result.stdout)

end_time6 = time.time()
elapsed_time_minutes = round((end_time6 - start_time6)/60, 4)
file_path = os.path.join(folder_name, "pipeline_files/execution_time/6_posebusters_filter.txt")
with open(file_path, "w") as file:
    file.write(f"{elapsed_time_minutes} minutes")

In [None]:
process_pb_csv(folder_name)
final_output(folder_name, input_smiles, passes=19)

In [None]:
end_time = time.time()
elapsed_time_minutes = round((end_time - start_time)/60, 1)

file_path = os.path.join(folder_name, "pipeline_files/execution_time/total_execution_time.txt")
with open(file_path, "w") as file:
    file.write(f"{elapsed_time_minutes} minutes")
print(f"\033[1m\033[34mTotal execution time (minutes): \033[91m{elapsed_time_minutes}\033[0m")

.

<div style="background-color:#4B6587; color:#F0E5CF; padding: 3px; border-radius: 10px;">
</div>

#### Check SDF File

In [None]:
# sdf_file_path = os.path.join(folder_name, "pipeline_files/ligands_sdf/CHEMBL1958347.sdf")
# process_sdf_file(sdf_file_path)

#### Check PDBQT File

In [None]:
# file_path = os.path.join(folder_name, "pipeline_files/ligands_pdbqt_out/BB_44_out.pdbqt")
# print(''.join(open(file_path, 'r').readlines()))

#### Check Molecule Structure File

In [None]:
# mol = Chem.MolFromSmiles('C[C@@H]1CNC(=O)C2(CNC2)C/C=C/CN(C)S(=O)(=O)c2cccc(c2)C(=O)N1.OC(=O)C(F)(F)F')
# img = Draw.MolToImage(mol, size=(600, 600))
# display(img)

In [None]:
# df11 = pd.read_csv('minD_antibact_obabel/output.csv')
# df11.head()

In [None]:
# df12 = df11[df11['SMILES'].str.startswith('Cl.')]
# df12.head()