In [1]:
import numpy
from combind_utils.bounding_box_utils import read_bounding_box, is_within_bounding_box
from combind_utils.atoms_utils import get_atoms_coordinates, get_ligand_protein_path
from schrodinger.structutils import rmsd

In [6]:
import glob
protein_path = glob.glob("/home/pc/Documents/combind_fragment/combind_fragment/fragment_dataset/*")
import os

from collections import defaultdict
ligands_dict = defaultdict(list)
for each_protein_path in protein_path:
    reference_protein_path = os.path.join(each_protein_path, "structures", "grids")
    #list folders under the path
    reference_protein_path = os.listdir(reference_protein_path)
    if len(reference_protein_path) > 1:
        assert False, "More than one reference protein path found"
    
    reference_protein_name = reference_protein_path[0].split("/")[-1]
    ligands_path = glob.glob(f"{each_protein_path}/structures/ligands/*.mae")
    ligands_dict[(reference_protein_name,os.path.join(each_protein_path))] = ligands_path






In [8]:
import schrodinger
#try to read the mae file
from schrodinger.structure import StructureReader
from tqdm import tqdm
from schrodinger.structutils import rmsd

protein_to_ligands = {}

for reference_ligand_name, ligand_base_path in tqdm(ligands_dict):
    
    # Path to bounding box file: <ligand_base_path>/structures/grids/<reference_ligand_name>/<reference_ligand_name>.in
    boxing_box_path = os.path.join(ligand_base_path, "structures", "grids", reference_ligand_name,f"{reference_ligand_name}.in")
    bounding_box = read_bounding_box(boxing_box_path)
    
    # Path to reference protein structure: <ligand_base_path>/structures/proteins/<reference_ligand_name>_prot.mae
    reference_protein_path = os.path.join(ligand_base_path, "structures", "proteins", f"{reference_ligand_name}_prot.mae")
    pocket_atoms = []
    
    # Read protein structure from MAE file and extract atoms within the binding pocket
    # This code reads a protein structure from a MAE file and identifies atoms that lie within 
    # a predefined bounding box region representing the binding pocket. For each atom in the protein,
    # it checks if the atom's coordinates fall within the box boundaries. If an atom is within the 
    # bounding box, it is considered part of the binding pocket and added to the pocket_atoms list.
    with StructureReader(reference_protein_path) as reader:
        for model in reader:
            # Iterate through atoms in the protein model
            for atom in model.atom:
                # Get x,y,z coordinates of atom
                x, y, z = get_atoms_coordinates(atom)
                # Check if atom is within the defined bounding box region
                if is_within_bounding_box([x, y, z], bounding_box, box_type = "outer_box"):
                    pocket_atoms.append(atom)
        
                
    


100%|██████████| 44/44 [00:00<00:00, 108.10it/s]


In [69]:
# This code reads predicted ligand poses from Glide docking and stores their docking scores
# The poses are read from a MAE file containing multiple docked conformations
# Each pose (except the reference structure) is stored in a dictionary with its docking score
# Lower docking scores indicate better predicted binding affinity

# Path to the predicted poses from Glide docking
poses_pred_path = "/home/pc/Documents/combind_fragment/combind_fragment/fragment_dataset/A5H660/docking/4BZ6_lig-to-4bz6/4BZ6_lig-to-4bz6_pv.maegz"
poses_true_path = "/home/pc/Documents/combind_fragment/combind_fragment/fragment_dataset/A5H660/structures/ligands/4bz6_lig.mae"
poses_true = next(StructureReader(poses_true_path))


# Create a StructureReader object to read the poses file
poses_pred = StructureReader(poses_pred_path)

# Base name to identify the reference structure
base_name = "4bz6"

# Dictionary to store docking poses and their scores
poses_pred_structure_glide = {}

# Read each model from the poses file
with StructureReader(poses_pred_path) as reader:
    for model in reader:
        # Skip the reference structure
        if base_name in model.property["s_m_title"]:
            continue
        else:
            # Extract the docking score for this pose
            docking_score = model.property["r_i_docking_score"]
            # Store the pose model and its docking score
            poses_pred_structure_glide[model] = docking_score
            rmsd_value = rmsd.ConformerRmsd(reference_structure = poses_true, 
                                     test_structure = model, 
                                     asl_expr = "atom.element C").calculate()
            #break
            print(rmsd_value)


7.7503078065360524
7.255003402828024
7.207054875278265
7.424845533169471
7.0316075320475875
7.00833871623721
6.918062651160391
7.275681393939268
7.033971924286396
6.78700666164446
6.759924502583199
7.372259150549506
4.263829107248956
5.116802653108489
7.099081569086092
7.132743455640043
6.894822432685418
7.104385726163662
5.422017893272169
5.37824722683377
4.200972002950669
6.511971674746247
5.71469903197165
7.749410986395666
7.541278544808348
5.10703051441637
6.916857421131557
6.192557585900329
7.269044617488576
3.40948591640579
5.1656454176756075
5.291335346964258
5.249058620098208
7.475494150334262
4.180943914395878
6.207734653865536
7.615786294989863
7.126293346500274
7.112786241630148
5.4463929553137795
3.4108732885366657
6.911144522486452
7.405700286277773
7.425222768536011
7.149182722890718
6.880017018411333
7.039472096522863
6.069199264086143
7.329962975893286
7.071986089896509
7.023880332124839
3.3559154909099584
6.580086548217882
6.920035519201612
7.38131419678737
7.472262106

In [67]:
model_atom_list = list(model.atom)
pose_true_atom_list = list(poses_true.atom)
#save model and pose_true to pdb file
model.write("model.pdb")
poses_true.write("pose_true.pdb")

In [64]:
for i in range(len(pose_true_atom_list)):
    print("123")
    print(model_atom_list[i].element)
    print(pose_true_atom_list[i].element)


123
O
C
123
C
N
123
C
O
123
C
C
123
C
N
123
C
O
123
C
C
123
C
O
123
C
C
123
O
C
123
N
C
123
C
C
123
C
C
123
C
C
123
C
C
123
C
C
123
C
C
123
N
C
123
O
C
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123
H
H
123


IndexError: list index out of range