In [1]:
import os
import re
import csv 
import sys
import time
import math
import torch
import random
import shutil
import psutil
import string
import logging
import subprocess
import pandas as pd
import concurrent.futures
import ipywidgets as widgets
import multiprocessing as mp

from glob import glob
from typing import Optional, List
from IPython.display import display
from rdkit import Chem, DataStructs
from z_docking_files.utils import *
from openbabel import openbabel, pybel
from multiprocessing import Pool, cpu_count
from concurrent.futures import ThreadPoolExecutor
from rdkit.Chem import AllChem, Descriptors, Draw 

In [2]:
device = check_availability()

[1m[34mGPU availability in own5: [91m95.00%[0m
[1m[34mCPU availability in own5: [91m72.40%[0m


<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 1. Load Data </h2>
</div>

In [3]:
folder_name = "brd4_hll"
pdb_file = "brd4.pdb"  
pdbqt_rigid = "brd4_rigid.pdbqt"
pdbqt_flex = "brd4_flex.pdbqt"
input_csv = "output.csv"
threshold = -9.5

In [4]:
flex_folder_name = folder_name + "_flexible"
os.makedirs(os.path.join(flex_folder_name), exist_ok=True)

In [5]:
input_smiles = os.path.join(folder_name, input_csv)
df = pd.read_csv(input_smiles)
df = df[df['Affinity'] < threshold]
df.to_csv(os.path.join(flex_folder_name, 'output.csv'), index=False)
display(df.head())
print(df.shape)

Unnamed: 0,Name,SMILES,Affinity
0,brd4_chemdiv4079,c1(c(c2c(o1)c(c1c(c(cc(=O)o1)c1ccccc1)c2)C)c1c...,-10.977
1,brd4_chemdiv1637,c12c(S(=O)(=O)c3ccc(cc3)Br)nnn1c1c(c(n2)Nc2cc(...,-10.937
2,brd4_chemdiv1638,c12c(S(=O)(=O)c3ccc(cc3)Br)nnn1c1c(c(n2)Nc2cc(...,-10.903
3,brd4_chemdiv1636,c12c(S(=O)(=O)c3ccc(cc3)Br)nnn1c1c(c(n2)Nc2c(c...,-10.872
4,brd4_chemdiv1318,n12c(nnc2C)c2c(c(n1)c1cc(S(=O)(=O)N3CCOCC3)c(c...,-10.812


(292, 3)


<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 2. Copy PDBQT files and Create ligands path </h2>
</div>

In [6]:
copy_pdbqt_files_flex(df, folder_name, flex_folder_name)
create_ligands_path_flex(flex_folder_name)

<div style="background-color:#8E44AD; color:#ECF0F1; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 3. Uni-Dock Flexible Docking </h2>
</div>

In [7]:
protein_rigid = os.path.join(folder_name, pdbqt_rigid)
protein_flex = os.path.join(folder_name, pdbqt_flex)

ligands_path = os.path.join(flex_folder_name, "pipeline_files/unidock_pdbqt_path.txt")

output_result = os.path.join(flex_folder_name, "pipeline_files/2_pdbqt_out_flex")
os.makedirs(output_result, exist_ok=True)

output_logs = os.path.join(flex_folder_name, "pipeline_files/unidock_output.txt")
os.system("touch " + output_logs)
print(f"\033[1m\033[34mCheck Docking Progress... \033[91m${output_logs}\033[0m")
unidock_command = (
    f"unidock "
    f"--receptor {protein_rigid} "
    f"--gpu_batch $(cat {ligands_path}) "
    f"--search_mode detail "
    f"--scoring vina "
    f"--center_x 86.79 "
    f"--center_y 43.89 "
    f"--center_z 65.43 "
    f"--size_x 20.00 "
    f"--size_y 20.00 "
    f"--size_z 20.00 "
    f"--num_modes 10 "
    f"--dir {output_result} "
    f">> {output_logs} 2>&1"
)

os.system(unidock_command)
print(f"\033[1m\033[34mDocking Process completed\033[0m")

[1m[34mCheck Docking Progress... [91m$brd4_hll_flexible/pipeline_files/unidock_output.txt[0m
[1m[34mDocking Process completed[0m


<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 4. Extract Affinity values and Model1 Content </h2>
</div>

In [8]:
affinity_from_pdbqt_files_flex(flex_folder_name)
extract_model1_flex(flex_folder_name)

[1m[34mAffnity values extracted and saved in folder: [91mbrd4_hll_flexible/pipeline_files/1_extract_affinity_from_pdbqt.csv[0m
[1m[34mExtracted Model_1 content and saved in folder: [91mbrd4_hll_flexible/pipeline_files/3_pdbqt_out_flex_m1[0m


<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 5. PDBQT to Mol2 and Mol2 to SDF</h2>
</div>

In [9]:
script_path = os.path.join("z_docking_files", "3_pdbqt_to_sdf_flex.sh")

result = subprocess.run(["/bin/bash", script_path, flex_folder_name])
print(result.stderr)

PDBQT to SDF conversion for batch 1 is completed.
[1m[34mPDBQT to SDF conversion completed and files saved in folder: [91mbrd4_hll_flexible/pipeline_files/5_sdf_out[0m
None


<div style="background-color:#4B6587; color:#F0E5CF; padding: 1px; border-radius: 10px;">
    <h2 style="font-size: 16px; margin-left: 10px;"> Step 6. PoseBusters Filter </h2>
</div>

In [10]:
script_path = os.path.join("z_docking_files", "4_posebusters_filter_flex.sh")
pdb = os.path.join(folder_name, pdb_file)

result = subprocess.run(["/bin/bash", script_path, flex_folder_name, pdb])
print(result.stdout)

[1m[34mCheck PoseBusters Progress... [91mbrd4_hll_flexible/pipeline_files/2_pb_out.csv[0m
[1m[34mPoseBusters Filtration Completed[0m
None


In [12]:
process_pb_csv_flex(flex_folder_name)
final_output_flex(flex_folder_name, input_smiles, passes=17)

Unnamed: 0,Name,mol_pred_loaded,mol_cond_loaded,sanitization,all_atoms_connected,bond_lengths,bond_angles,internal_steric_clash,aromatic_ring_flatness,double_bond_flatness,...,protein-ligand_maximum_distance,minimum_distance_to_protein,minimum_distance_to_organic_cofactors,minimum_distance_to_inorganic_cofactors,minimum_distance_to_waters,volume_overlap_with_protein,volume_overlap_with_organic_cofactors,volume_overlap_with_inorganic_cofactors,volume_overlap_with_waters,passes
0,hll_enamine250296,True,True,True,True,True,True,True,True,True,...,True,False,True,True,True,False,True,True,True,17
1,hll_enamine234757,True,True,True,True,True,True,True,True,True,...,True,False,True,True,True,False,True,True,True,17
2,brd_enamine5604,True,True,True,True,True,True,True,True,True,...,True,False,True,True,True,False,True,True,True,17
3,brd_enamine13050,True,True,True,True,True,True,True,True,True,...,True,False,True,True,True,False,True,True,True,17
4,brd_enamine9035,True,True,True,True,True,True,True,True,True,...,True,False,True,True,True,False,True,True,True,17


[1m[34mCompounds filtered out by PoseBusters: [91m0[0m
