In [1]:
import os
import Bio
from Bio.PDB import PDBParser, PDBIO, Select, NeighborSearch, Selection
#from Exception import LookupError

# https://stackoverflow.com/questions/61390035/how-to-save-each-ligand-from-a-pdb-file-separately-with-bio-pdb

def is_het(residue):
    res = residue.id[0]
    return res != " " and res != "W"

class LigandSelect(Select):
    def __init__(self, chain, residue):
        self.chain = chain
        self.residue = residue

    def accept_chain(self, chain):
        return chain.id == self.chain.id

    def accept_residue(self, residue):
        """ Recognition of heteroatoms - Remove water molecules """
        return residue == self.residue and is_het(residue)
    
class ProteinSelect(Select):
    """
    Selects protein, no water and ligand
    """
    def accept_residue(self, residue):
        residues = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", \
                "GLY", "HIS", "ILE", "LEU", "LYS", "MET", "PHE", \
                "PRO", "SER", "THR", "TRP", "TYR", "VAL"]
        if residue.get_resname() in residues:
            return True
        return False
    
def decouple_pdb(path_in="../pdb/train_coupled/", path_out="../pdb/train_decoupled/"):
    """ Creates ligand file and protein (no water, etc.) file """
    for file in os.listdir(path_in):
        i = 0
        if file.endswith('.ent'):
            pdb_code = file[3:7]
            pdb = PDBParser().get_structure(pdb_code, path_in+file)
            io = PDBIO()
            io.set_structure(pdb)
            io.save(path_out+f"{pdb_code}_prot.pdb", ProteinSelect())
            for chain in pdb[0]:
                for residue in chain:
                    if not is_het(residue):
                        continue
                    io.save(path_out + f"{pdb_code}_lig_{i}.pdb", LigandSelect(chain, residue))
                    i += 1
            if i > 1:
                print(f"Multiple ligands found for {pdb_code} ({i})!")
            
        

In [144]:
decouple_pdb()



Multiple ligands found for 11as (2)!
Multiple ligands found for 12as (4)!
Multiple ligands found for 1a27 (2)!
Multiple ligands found for 1a28 (2)!




Multiple ligands found for 1a2c (5)!
Multiple ligands found for 1a4w (4)!




Multiple ligands found for 1a61 (4)!
Multiple ligands found for 1a78 (5)!




Multiple ligands found for 1a82 (3)!




Multiple ligands found for 1a8i (2)!




Multiple ligands found for 1a8r (15)!
Multiple ligands found for 1aax (3)!
Multiple ligands found for 1ae8 (3)!




Multiple ligands found for 1afe (3)!




Multiple ligands found for 1afq (3)!




Multiple ligands found for 1agw (2)!




Multiple ligands found for 1alw (10)!




Multiple ligands found for 1aoe (4)!
Multiple ligands found for 1aqb (2)!




Multiple ligands found for 1aqw (8)!




Multiple ligands found for 1aqx (8)!




Multiple ligands found for 1arg (2)!
Multiple ligands found for 1aua (2)!
Multiple ligands found for 1ax0 (10)!




Multiple ligands found for 1ax2 (11)!
Multiple ligands found for 1axr (4)!




Multiple ligands found for 1aym (3)!
Multiple ligands found for 1b0u (4)!




Multiple ligands found for 1b16 (2)!
Multiple ligands found for 1b1c (2)!




Multiple ligands found for 1b3d (11)!
Multiple ligands found for 1b4d (3)!




Multiple ligands found for 1b7y (2)!
Multiple ligands found for 1b9t (4)!




Multiple ligands found for 1bb0 (4)!
Multiple ligands found for 1bf3 (2)!
Multiple ligands found for 1bgn (2)!




Multiple ligands found for 1bht (5)!




Multiple ligands found for 1biw (11)!
Multiple ligands found for 1bj5 (5)!




Multiple ligands found for 1bji (11)!
Multiple ligands found for 1bju (4)!
Multiple ligands found for 1bjv (5)!
Multiple ligands found for 1bkw (2)!
Multiple ligands found for 1bky (2)!
Multiple ligands found for 1bl5 (3)!




Multiple ligands found for 1bm7 (2)!
Multiple ligands found for 1boz (2)!




Multiple ligands found for 1bq1 (6)!
Multiple ligands found for 1bs1 (4)!




Multiple ligands found for 1bu5 (4)!




Multiple ligands found for 1bvy (8)!




Multiple ligands found for 1bwc (3)!




Multiple ligands found for 1bwu (14)!
Multiple ligands found for 1c0l (2)!




Multiple ligands found for 1c14 (4)!
Multiple ligands found for 1c23 (4)!




Multiple ligands found for 1c3v (5)!
Multiple ligands found for 1c4q (15)!




Multiple ligands found for 1c4u (3)!




Multiple ligands found for 1c4v (3)!
Multiple ligands found for 1c50 (2)!




Multiple ligands found for 1c5o (3)!




Multiple ligands found for 1c5w (4)!




Multiple ligands found for 1c7o (8)!
Multiple ligands found for 1c80 (4)!




Multiple ligands found for 1c8k (2)!




Multiple ligands found for 1c8u (2)!




Multiple ligands found for 1ca8 (4)!
Multiple ligands found for 1cg6 (2)!




Multiple ligands found for 1chw (2)!




Multiple ligands found for 1ckm (2)!
Multiple ligands found for 1ckp (2)!
Multiple ligands found for 1cly (5)!




Multiple ligands found for 1cml (3)!
Multiple ligands found for 1cnq (6)!




Multiple ligands found for 1cpu (9)!
Multiple ligands found for 1cqf (18)!




Multiple ligands found for 1cyd (8)!
Multiple ligands found for 1cza (5)!
Multiple ligands found for 1czi (3)!




Multiple ligands found for 1d0h (3)!
Multiple ligands found for 1d1q (3)!




Multiple ligands found for 1d2a (4)!
Multiple ligands found for 1d2s (3)!




Multiple ligands found for 1d5z (4)!




Multiple ligands found for 1d6s (4)!
Multiple ligands found for 1d6w (4)!




Multiple ligands found for 1d7u (3)!




Multiple ligands found for 1d8a (4)!




Multiple ligands found for 1d9i (4)!
Multiple ligands found for 1db4 (3)!




Multiple ligands found for 1dbt (3)!




Multiple ligands found for 1dbv (12)!
Multiple ligands found for 1dek (6)!




Multiple ligands found for 1del (5)!




Multiple ligands found for 1dfo (8)!




Multiple ligands found for 1dgl (10)!




Multiple ligands found for 1djr (11)!




Multiple ligands found for 1dkf (2)!
Multiple ligands found for 1dkp (5)!




Multiple ligands found for 1dll (6)!
Multiple ligands found for 1dm2 (5)!




Multiple ligands found for 1dmk (16)!
Multiple ligands found for 1dnl (6)!
Multiple ligands found for 1doh (4)!




Multiple ligands found for 1dp2 (2)!




Multiple ligands found for 1dqa (12)!
Multiple ligands found for 1dqp (4)!




Multiple ligands found for 1dtl (6)!




Multiple ligands found for 1dv2 (2)!




Multiple ligands found for 1dvj (4)!




Multiple ligands found for 1dxr (25)!
Multiple ligands found for 1dy4 (6)!




Multiple ligands found for 1dz8 (9)!
Multiple ligands found for 1e1k (2)!
Multiple ligands found for 1e1x (2)!
Multiple ligands found for 1e1y (4)!
Multiple ligands found for 1e20 (3)!




Multiple ligands found for 1e2i (6)!




Multiple ligands found for 1e2j (4)!
Multiple ligands found for 1e3r (2)!




Multiple ligands found for 1e3v (2)!




Multiple ligands found for 1e40 (9)!
Multiple ligands found for 1e4i (2)!




Multiple ligands found for 1e4n (2)!




Multiple ligands found for 1e56 (4)!




Multiple ligands found for 1e5j (8)!




Multiple ligands found for 1e5q (16)!




Multiple ligands found for 1e6e (9)!




Multiple ligands found for 1e6r (10)!




Multiple ligands found for 1e6z (9)!




Multiple ligands found for 1e7a (4)!




Multiple ligands found for 1e7b (6)!
Multiple ligands found for 1e7f (8)!
Multiple ligands found for 1e7y (3)!




Multiple ligands found for 1e9h (4)!
Multiple ligands found for 1ecs (3)!




Multiple ligands found for 1ecv (4)!




Multiple ligands found for 1eef (14)!




Multiple ligands found for 1egh (6)!




Multiple ligands found for 1eix (4)!




Multiple ligands found for 1el5 (9)!
Multiple ligands found for 1eqa (2)!




Multiple ligands found for 1eqy (5)!
Multiple ligands found for 1esv (6)!




Multiple ligands found for 1ewk (11)!
Multiple ligands found for 1exa (2)!




Multiple ligands found for 1ey3 (6)!




Multiple ligands found for 1ez1 (13)!




Multiple ligands found for 1ez9 (8)!
Multiple ligands found for 1f06 (3)!




Multiple ligands found for 1f0y (4)!




Multiple ligands found for 1f17 (2)!
Multiple ligands found for 1f3a (2)!




Multiple ligands found for 1f3b (4)!




Multiple ligands found for 1f3t (8)!
Multiple ligands found for 1f4e (10)!
Multiple ligands found for 1f4l (2)!
Multiple ligands found for 1f5v (2)!




Multiple ligands found for 1f7k (5)!
Multiple ligands found for 1f7p (2)!




Multiple ligands found for 1f8g (63)!
Multiple ligands found for 1f9d (10)!




Multiple ligands found for 1fae (3)!




Multiple ligands found for 1fbo (3)!
Multiple ligands found for 1fbw (10)!




Multiple ligands found for 1fby (2)!
Multiple ligands found for 1fcx (2)!
Multiple ligands found for 1fd0 (2)!
Multiple ligands found for 1ffq (3)!




Multiple ligands found for 1fgi (2)!




Multiple ligands found for 1fj4 (4)!
Multiple ligands found for 1fjj (6)!




Multiple ligands found for 1fk8 (2)!




Multiple ligands found for 1fkn (2)!




Multiple ligands found for 1flm (2)!




Multiple ligands found for 1fm7 (5)!
Multiple ligands found for 1fmj (12)!
Multiple ligands found for 1fqa (4)!




Multiple ligands found for 1fqo (4)!




Multiple ligands found for 1frz (2)!
Multiple ligands found for 1fs4 (2)!
Multiple ligands found for 1ftq (3)!
Multiple ligands found for 1fu4 (2)!
Multiple ligands found for 1fv0 (10)!




Multiple ligands found for 1fwu (3)!
Multiple ligands found for 1fy7 (2)!




Multiple ligands found for 1g0n (3)!




Multiple ligands found for 1g0o (8)!
Multiple ligands found for 1g13 (7)!




Multiple ligands found for 1g5y (2)!
Multiple ligands found for 1g6n (2)!




Multiple ligands found for 1g6o (19)!
Multiple ligands found for 1g74 (2)!
Multiple ligands found for 1g76 (4)!
Multiple ligands found for 1g86 (2)!
Multiple ligands found for 1g8i (14)!




Multiple ligands found for 1g98 (2)!




Multiple ligands found for 1g9q (2)!




Multiple ligands found for 1g9v (8)!




Multiple ligands found for 1ga1 (4)!
Multiple ligands found for 1gan (4)!




Multiple ligands found for 1gbn (6)!




Multiple ligands found for 1gck (4)!
Multiple ligands found for 1gfz (3)!




Multiple ligands found for 1gg6 (9)!




Multiple ligands found for 1ghw (3)!




Multiple ligands found for 1gi8 (3)!




Multiple ligands found for 1gj7 (3)!




Multiple ligands found for 1gj8 (3)!




Multiple ligands found for 1gjb (3)!




Multiple ligands found for 1gjd (3)!




Multiple ligands found for 1gkl (20)!




Multiple ligands found for 1gm8 (2)!
Multiple ligands found for 1goo (2)!
Multiple ligands found for 1goy (4)!




Multiple ligands found for 1gp6 (6)!
Multiple ligands found for 1gsa (5)!




Multiple ligands found for 1gth (32)!
Multiple ligands found for 1gw2 (5)!
Multiple ligands found for 1gz8 (2)!
Multiple ligands found for 1h01 (5)!
Multiple ligands found for 1h08 (4)!
Multiple ligands found for 1h0s (7)!
Multiple ligands found for 1h16 (15)!




Multiple ligands found for 1h1r (4)!
Multiple ligands found for 1h3n (7)!
Multiple ligands found for 1h46 (3)!
Multiple ligands found for 1h5u (3)!
Multiple ligands found for 1h61 (2)!




Multiple ligands found for 1h6c (6)!
Multiple ligands found for 1h78 (3)!
Multiple ligands found for 1h9z (7)!
Multiple ligands found for 1ha2 (7)!
Multiple ligands found for 1he3 (2)!




Multiple ligands found for 1hg4 (6)!
Multiple ligands found for 1hj6 (5)!
Multiple ligands found for 1hk4 (8)!
Multiple ligands found for 1hlf (2)!




Multiple ligands found for 1hm2 (13)!




Multiple ligands found for 1hmu (11)!
Multiple ligands found for 1hnn (4)!




Multiple ligands found for 1ho4 (7)!




Multiple ligands found for 1hop (2)!




Multiple ligands found for 1hox (2)!
Multiple ligands found for 1hp1 (7)!




Multiple ligands found for 1hv6 (4)!




Multiple ligands found for 1hw8 (6)!




Multiple ligands found for 1hwi (7)!




Multiple ligands found for 1hzz (2)!
Multiple ligands found for 1i00 (4)!




Multiple ligands found for 1i0z (4)!




Multiple ligands found for 1i10 (24)!
Multiple ligands found for 1i2c (4)!
Multiple ligands found for 1i5r (4)!
Multiple ligands found for 1i7g (3)!




Multiple ligands found for 1i9h (2)!
Multiple ligands found for 1ia1 (6)!




Multiple ligands found for 1ia9 (5)!




Multiple ligands found for 1iah (6)!
Multiple ligands found for 1ib0 (2)!




Multiple ligands found for 1icq (4)!
Multiple ligands found for 1iep (8)!




Multiple ligands found for 1iex (9)!




Multiple ligands found for 1ig3 (16)!




Multiple ligands found for 1iin (4)!
Multiple ligands found for 1iiu (2)!




Multiple ligands found for 1ik4 (6)!
Multiple ligands found for 1inf (4)!
Multiple ligands found for 1ipf (4)!




Multiple ligands found for 1iri (4)!
Multiple ligands found for 1is4 (2)!




Multiple ligands found for 1iuc (4)!
Multiple ligands found for 1iut (2)!




Multiple ligands found for 1iwe (6)!
Multiple ligands found for 1iwh (5)!
Multiple ligands found for 1ix7 (2)!




Multiple ligands found for 1ixn (8)!




Multiple ligands found for 1iyk (4)!
Multiple ligands found for 1iz2 (2)!




Multiple ligands found for 1j0d (4)!




Multiple ligands found for 1j0i (6)!
Multiple ligands found for 1j16 (9)!
Multiple ligands found for 1j17 (3)!
Multiple ligands found for 1j1g (2)!
Multiple ligands found for 1j4r (6)!




Multiple ligands found for 1j6z (9)!
Multiple ligands found for 1j84 (5)!
Multiple ligands found for 1j8a (8)!




Multiple ligands found for 1j8v (12)!
Multiple ligands found for 1ja9 (2)!
Multiple ligands found for 1jbw (5)!
Multiple ligands found for 1jc9 (2)!
Multiple ligands found for 1jdj (2)!




Multiple ligands found for 1jdt (6)!




Multiple ligands found for 1jdv (10)!




Multiple ligands found for 1je1 (12)!
Multiple ligands found for 1jep (7)!




Multiple ligands found for 1jjv (3)!




Multiple ligands found for 1jkx (4)!




Multiple ligands found for 1jlx (8)!
Multiple ligands found for 1jmo (12)!




Multiple ligands found for 1jqi (4)!




Multiple ligands found for 1js3 (8)!




Multiple ligands found for 1jsh (12)!
Multiple ligands found for 1jtv (2)!
Multiple ligands found for 1jzs (3)!
Multiple ligands found for 1k06 (3)!
Multiple ligands found for 1k0j (7)!




Multiple ligands found for 1k0y (8)!
Multiple ligands found for 1k12 (3)!




Multiple ligands found for 1k3l (2)!




Multiple ligands found for 1k4m (6)!




Multiple ligands found for 1k5s (2)!
Multiple ligands found for 1k6x (7)!
Multiple ligands found for 1k97 (2)!




Multiple ligands found for 1k9j (8)!




Multiple ligands found for 1k9s (12)!




Multiple ligands found for 1k9t (4)!
Multiple ligands found for 1ka0 (2)!




Multiple ligands found for 1kbi (10)!




Multiple ligands found for 1kdt (5)!




Multiple ligands found for 1ki3 (4)!
Multiple ligands found for 1ki6 (4)!




Multiple ligands found for 1kj1 (14)!
Multiple ligands found for 1kkp (2)!
Multiple ligands found for 1kl1 (2)!




Multiple ligands found for 1kn2 (8)!




Multiple ligands found for 1koj (2)!
Multiple ligands found for 1kpm (5)!




Multiple ligands found for 1kpv (8)!
Multiple ligands found for 1kqr (3)!




Multiple ligands found for 1kqz (4)!
Multiple ligands found for 1kr0 (5)!
Multiple ligands found for 1kti (2)!
Multiple ligands found for 1kuj (4)!




Multiple ligands found for 1kxh (5)!




Multiple ligands found for 1kz8 (8)!
Multiple ligands found for 1kzk (11)!




Multiple ligands found for 1l2t (6)!




Multiple ligands found for 1l3i (54)!




Multiple ligands found for 1lev (8)!




Multiple ligands found for 1lf9 (8)!
Multiple ligands found for 1lgt (4)!




Multiple ligands found for 1lh0 (4)!
Multiple ligands found for 1lhu (2)!
Multiple ligands found for 1lkd (5)!




Multiple ligands found for 1llq (2)!




Multiple ligands found for 1los (4)!




Multiple ligands found for 1lot (9)!




Multiple ligands found for 1lp6 (2)!




Multiple ligands found for 1lti (5)!




Multiple ligands found for 1lua (3)!




Multiple ligands found for 1lw5 (73)!




Multiple ligands found for 1lwj (4)!
Multiple ligands found for 1lwn (3)!
Multiple ligands found for 1lwo (4)!
Multiple ligands found for 1lx6 (4)!




Multiple ligands found for 1m0u (3)!
Multiple ligands found for 1m1d (2)!




Multiple ligands found for 1m26 (8)!




Multiple ligands found for 1m3u (20)!
Multiple ligands found for 1m48 (2)!




Multiple ligands found for 1m4d (5)!




Multiple ligands found for 1m5b (8)!




Multiple ligands found for 1m5w (8)!
Multiple ligands found for 1m67 (2)!




Multiple ligands found for 1m78 (4)!
Multiple ligands found for 1m7q (2)!
Multiple ligands found for 1m7y (2)!




Multiple ligands found for 1m9n (6)!




Multiple ligands found for 1mfi (3)!




Multiple ligands found for 1mfp (5)!
Multiple ligands found for 1mgp (7)!




Multiple ligands found for 1mja (2)!
Multiple ligands found for 1mjl (2)!




Multiple ligands found for 1ml3 (6)!
Multiple ligands found for 1ml6 (6)!




Multiple ligands found for 1mly (6)!




Multiple ligands found for 1mo9 (4)!




Multiple ligands found for 1mp0 (11)!
Multiple ligands found for 1mq5 (5)!
Multiple ligands found for 1mqe (2)!
Multiple ligands found for 1mrq (3)!




Multiple ligands found for 1mrz (2)!




Multiple ligands found for 1ms8 (2)!




Multiple ligands found for 1ms9 (4)!
Multiple ligands found for 1mtv (2)!
Multiple ligands found for 1mvt (3)!




Multiple ligands found for 1mwe (16)!




Multiple ligands found for 1mxh (8)!




Multiple ligands found for 1mxu (8)!




Multiple ligands found for 1my2 (8)!
Multiple ligands found for 1mzs (3)!




Multiple ligands found for 1n1e (2)!
Multiple ligands found for 1n1g (4)!
Multiple ligands found for 1n1t (3)!




Multiple ligands found for 1n3o (6)!
Multiple ligands found for 1n5q (2)!




Multiple ligands found for 1n6b (4)!




Multiple ligands found for 1n71 (9)!
Multiple ligands found for 1n8u (3)!




Multiple ligands found for 1naa (9)!




Multiple ligands found for 1ndi (2)!




Multiple ligands found for 1ndj (4)!




Multiple ligands found for 1ne7 (31)!
Multiple ligands found for 1nep (2)!




Multiple ligands found for 1ney (4)!




Multiple ligands found for 1nf0 (4)!




Multiple ligands found for 1nhu (2)!




Multiple ligands found for 1nlm (6)!




Multiple ligands found for 1nm6 (2)!




Multiple ligands found for 1nmd (4)!
Multiple ligands found for 1nmk (2)!




Multiple ligands found for 1npl (11)!
Multiple ligands found for 1nq2 (7)!




Multiple ligands found for 1nq5 (8)!
Multiple ligands found for 1nr6 (4)!




Multiple ligands found for 1nt1 (2)!
Multiple ligands found for 1nuq (6)!




Multiple ligands found for 1nut (4)!
Multiple ligands found for 1nux (5)!




Multiple ligands found for 1nvq (2)!
Multiple ligands found for 1nwl (2)!
Multiple ligands found for 1nzd (4)!




Multiple ligands found for 1o0s (4)!




Multiple ligands found for 1o2g (3)!




Multiple ligands found for 1o3p (3)!
Multiple ligands found for 1o3w (2)!




Multiple ligands found for 1o57 (16)!
Multiple ligands found for 1o72 (2)!




Multiple ligands found for 1o94 (8)!




Multiple ligands found for 1o9b (27)!
Multiple ligands found for 1obh (11)!




Multiple ligands found for 1oc5 (11)!
Multiple ligands found for 1ocn (7)!
Multiple ligands found for 1od8 (10)!




Multiple ligands found for 1ofd (6)!
Multiple ligands found for 1ofs (8)!




Multiple ligands found for 1ogx (2)!
Multiple ligands found for 1oh0 (3)!




Multiple ligands found for 1oir (2)!




Multiple ligands found for 1oj4 (17)!




Multiple ligands found for 1oja (4)!




Multiple ligands found for 1oni (13)!
Multiple ligands found for 1opk (3)!
Multiple ligands found for 1oss (4)!
Multiple ligands found for 1ouk (2)!
Multiple ligands found for 1ove (2)!




Multiple ligands found for 1ox5 (5)!




Multiple ligands found for 1oyn (12)!




Multiple ligands found for 1oz0 (6)!




Multiple ligands found for 1p1q (8)!




Multiple ligands found for 1p2d (3)!




Multiple ligands found for 1p5e (4)!




Multiple ligands found for 1p5r (2)!




Multiple ligands found for 1p60 (4)!
Multiple ligands found for 1p61 (2)!




Multiple ligands found for 1p6x (8)!




Multiple ligands found for 1p72 (13)!




Multiple ligands found for 1p7c (3)!




Multiple ligands found for 1p84 (13)!




Multiple ligands found for 1p9l (5)!




Multiple ligands found for 1pbq (2)!
Multiple ligands found for 1pgt (4)!
Multiple ligands found for 1pj6 (3)!




Multiple ligands found for 1pkd (4)!
Multiple ligands found for 1pme (3)!




Multiple ligands found for 1pq6 (5)!




Multiple ligands found for 1pq9 (8)!




Multiple ligands found for 1pt8 (9)!
Multiple ligands found for 1pty (3)!




Multiple ligands found for 1pwz (4)!




Multiple ligands found for 1px0 (4)!
Multiple ligands found for 1pxh (5)!




Multiple ligands found for 1q3d (2)!




Multiple ligands found for 1q3w (2)!




Multiple ligands found for 1q41 (2)!
Multiple ligands found for 1q4s (4)!




Multiple ligands found for 1q6p (3)!
Multiple ligands found for 1q9d (12)!




Multiple ligands found for 1q9m (12)!
Multiple ligands found for 1qa0 (2)!
Multiple ligands found for 1qb6 (3)!
Multiple ligands found for 1qbn (3)!
Multiple ligands found for 1qbo (3)!
Multiple ligands found for 1qcf (2)!




Multiple ligands found for 1qdc (17)!
Multiple ligands found for 1qfv (2)!




Multiple ligands found for 1qha (16)!
Multiple ligands found for 1qhf (6)!




Multiple ligands found for 1qhi (4)!




Multiple ligands found for 1qi0 (7)!
Multiple ligands found for 1qi5 (6)!
Multiple ligands found for 1ql9 (3)!




Multiple ligands found for 1qmg (17)!
Multiple ligands found for 1qnf (2)!
Multiple ligands found for 1qpc (6)!
Multiple ligands found for 1qpe (5)!




Multiple ligands found for 1qpk (6)!
Multiple ligands found for 1qrr (4)!
Multiple ligands found for 1qwn (5)!




Multiple ligands found for 1qz6 (6)!
Multiple ligands found for 1r14 (3)!




Multiple ligands found for 1r31 (5)!
Multiple ligands found for 1ra8 (2)!




Multiple ligands found for 1rb3 (4)!




Multiple ligands found for 1rbo (4)!
Multiple ligands found for 1rc4 (2)!
Multiple ligands found for 1rf7 (4)!
Multiple ligands found for 1rh3 (2)!
Multiple ligands found for 1rx2 (4)!




Multiple ligands found for 1sfc (20)!




Multiple ligands found for 1swn (3)!
Multiple ligands found for 1tmk (3)!




Multiple ligands found for 1tox (2)!
Multiple ligands found for 1tsl (2)!




Multiple ligands found for 1ueh (10)!
Multiple ligands found for 1ugx (2)!
Multiple ligands found for 1v39 (2)!




Multiple ligands found for 1yef (9)!
Multiple ligands found for 1zeg (7)!




Multiple ligands found for 2ae2 (4)!




Multiple ligands found for 2ans (3)!




Multiple ligands found for 2arc (2)!




Multiple ligands found for 2bkj (3)!




Multiple ligands found for 2chb (23)!
Multiple ligands found for 2cmk (3)!
Multiple ligands found for 2dpm (2)!




Multiple ligands found for 2dub (4)!
Multiple ligands found for 2gpa (4)!




Multiple ligands found for 2ki5 (4)!




Multiple ligands found for 2man (2)!




Multiple ligands found for 2pgt (5)!
Multiple ligands found for 2skc (4)!
Multiple ligands found for 2src (2)!




Multiple ligands found for 2udp (6)!
Multiple ligands found for 2vp3 (2)!




Multiple ligands found for 3cpu (5)!
Multiple ligands found for 3mag (2)!
Multiple ligands found for 3man (3)!




Multiple ligands found for 3mct (2)!




Multiple ligands found for 3std (5)!




Multiple ligands found for 4dcg (2)!
Multiple ligands found for 6atj (4)!
Multiple ligands found for 7taa (2)!


In [156]:
import os
import numpy as np
import Bio
from Bio.PDB import PDBParser, PDBIO, Select, NeighborSearch, Selection


parser = PDBParser()
protein = parser.get_structure("protein", "../pdb/train_decoupled/1aaq_prot.pdb")[0]
ligand = parser.get_structure("ligand", "../pdb/train_decoupled/1aaq_lig_0.pdb")[0]

In [159]:
# count atom pair numbers for one complex
import time

start = time.time()
distances = dict()  # key: radius shell (lower bound, r - dr), val: dict{k:tuple(lig_atom.id, prot_atom.id) v: count}
dr = 0.1
distance_grid = np.arange(0.0001, 10+dr, dr)
ns = NeighborSearch(Selection.unfold_entities(protein, "A"))

# initialize distances dictionary
for dist in distance_grid[:-1]:
    distances[dist] = dict()

for lig_atom in ligand.get_atoms():
    lig_id = lig_atom.get_id()
    for i in range(0, len(distance_grid)-1):
        lower_dist = distance_grid[i]
        upper_dist = distance_grid[i+1]
        lower_bound = ns.search(lig_atom.coord, radius=lower_dist)
        upper_bound = ns.search(lig_atom.coord, radius=upper_dist)
        prot_atoms = list(set(upper_bound)-set(lower_bound))
        if len(prot_atoms) > 0:
            #print(f"{prot_atoms} between {round(lower_dist,1)} and {round(upper_dist,1)}")
            for prot_atom in prot_atoms:
                prot_id = prot_atom.get_id()
                if (lig_id, prot_id) not in distances[lower_dist]:
                    distances[lower_dist][(lig_id, prot_id)] = 1
                else:
                    distances[lower_dist][(lig_id, prot_id)] += 1            
print(time.time()-start)
    

0.4378972053527832


In [157]:
distances

{0.0001: {},
 0.10010000000000001: {},
 0.2001: {},
 0.30010000000000003: {},
 0.4001: {},
 0.5001: {},
 0.6001000000000001: {},
 0.7001000000000001: {},
 0.8001: {},
 0.9001: {},
 1.0001: {},
 1.1001: {},
 1.2001000000000002: {},
 1.3001: {},
 1.4001000000000001: {},
 1.5001: {},
 1.6001: {},
 1.7001000000000002: {},
 1.8001: {},
 1.9001000000000001: {},
 2.0001: {},
 2.1001000000000003: {('O4', 'N'): 1},
 2.2001000000000004: {},
 2.3001000000000005: {},
 2.4001000000000006: {},
 2.5001: {('O4', 'CA'): 1},
 2.6001000000000003: {},
 2.7001000000000004: {('OS', 'OD2'): 1, ('N4', 'O'): 1, ('O4', 'O'): 1},
 2.8001000000000005: {('O', 'N'): 1, ('OS', 'OD1'): 1},
 2.9001000000000006: {('N1', 'O'): 1,
  ('OS', 'OD1'): 1,
  ('O3', 'OD2'): 1,
  ('N5', 'O'): 1,
  ('O4', 'C'): 1},
 3.0001: {('CA', 'O'): 1, ('OS', 'OD2'): 1, ('CE2', 'CG'): 1},
 3.1001000000000003: {('N', 'OD2'): 1,
  ('CA3', 'O'): 1,
  ('N2', 'O'): 1,
  ('CD1', 'O'): 1},
 3.2001000000000004: {('OS', 'CG'): 1, ('CB2', 'O'): 1},
 3

In [170]:
# count atom pair numbers for one complex...but switched
import time

start = time.time()
pairs = dict()  # key: tuple(lig_atom.id, prot_atom.id, val: dict{k:distance (r-dr) v: count}
dr = 0.1
distance_grid = np.arange(dr, 10+dr, dr)

# store all protein atoms into NeighborSearch object. Can obtain distances between the query
# and the atoms stored in the object
ns = NeighborSearch(Selection.unfold_entities(protein, "A"))


for lig_atom in ligand.get_atoms():
    lig_id = lig_atom.get_id()
    for r in distance_grid:
        r = round(r, 2)
        lower_bound = ns.search(lig_atom.coord, radius=r-(dr/2))
        upper_bound = ns.search(lig_atom.coord, radius=r+(dr/2))
        prot_atoms = list(set(upper_bound)-set(lower_bound))
        if len(prot_atoms) > 0:
            for prot_atom in prot_atoms:
                prot_id = prot_atom.get_id()
                if (lig_id, prot_id) not in pairs:
                    pairs[(lig_id, prot_id)] = dict()
                    pairs[(lig_id, prot_id)][r] = 1
                else:
                    if round(r,2) not in pairs[(lig_id, prot_id)]:
                        pairs[(lig_id, prot_id)][r] = 1
                    else:
                        pairs[(lig_id, prot_id)][r] += 1

In [171]:
pairs

{('N', 'OD2'): {3.2: 1, 3.3: 1, 9.9: 1},
 ('N', 'CD1'): {4.0: 1, 8.0: 1, 8.9: 1, 9.7: 2, 9.8: 1, 10.0: 1},
 ('N', 'CG1'): {4.0: 1, 9.0: 1, 9.3: 1, 9.5: 1, 9.6: 1, 9.7: 1},
 ('N', 'CG'): {4.2: 1,
  4.4: 1,
  8.6: 1,
  8.7: 1,
  8.8: 1,
  9.1: 1,
  9.3: 1,
  9.9: 1,
  10.0: 2},
 ('N', 'O'): {4.3: 1,
  6.5: 1,
  6.6: 1,
  6.7: 2,
  6.8: 1,
  7.1: 1,
  8.5: 1,
  8.8: 1,
  9.4: 2,
  9.9: 1},
 ('N', 'CB'): {4.4: 1,
  5.2: 2,
  6.8: 1,
  8.8: 1,
  9.0: 1,
  9.2: 1,
  9.4: 2,
  9.8: 2,
  10.0: 1},
 ('N', 'N'): {4.7: 1,
  5.0: 1,
  5.1: 1,
  6.4: 1,
  6.7: 1,
  7.3: 1,
  8.0: 1,
  9.0: 1,
  9.2: 1,
  9.3: 2,
  9.5: 1,
  9.9: 1,
  10.0: 1},
 ('N', 'C'): {5.2: 1,
  5.5: 1,
  5.6: 1,
  6.0: 1,
  6.8: 1,
  7.1: 1,
  7.5: 1,
  8.2: 1,
  9.1: 1,
  9.8: 1,
  9.9: 1,
  10.0: 1},
 ('N', 'OD1'): {5.2: 2, 9.5: 1},
 ('N', 'CA'): {5.2: 1,
  5.3: 1,
  5.4: 1,
  5.8: 1,
  6.2: 1,
  7.0: 1,
  8.6: 1,
  8.9: 1,
  9.1: 1,
  9.3: 1,
  9.4: 1,
  9.6: 1,
  10.0: 1},
 ('N', 'NH2'): {5.4: 1, 9.5: 1},
 ('N', 'CG2'): {

In [None]:
import os
import glob
import numpy as np
import Bio
from Bio.PDB import PDBParser, PDBIO, Select, NeighborSearch, Selection


parser = PDBParser()
protein = parser.get_structure("protein", "../pdb/train_decoupled/1aaq_prot.pdb")[0]


In [182]:
import os
import glob
import numpy as np
import Bio
import pickle
from Bio.PDB import PDBParser, PDBIO, Select, NeighborSearch, Selection

pairs = dict()  # key: tuple(prot_atom.id, lig_atom.id, val: dict{k:distance (r-dr) v: count}
dr = 0.1
distance_grid = np.arange(dr, 10+dr, dr)


parser = PDBParser()
pdb_codes = ["1aaq", "1dxr"]
for pdb_code in pdb_codes:
    protein = parser.get_structure("protein", f"../pdb/train_decoupled/{pdb_code}_prot.pdb")[0]
    ligand_files = [os.path.basename(i) for i in glob.glob(f'../pdb/train_decoupled/{pdb_code}_lig_*.pdb')]
    ns = NeighborSearch(Selection.unfold_entities(protein, "A"))
    
    for ligand_file in ligand_files:
        ligand = parser.get_structure("ligand", f"../pdb/train_decoupled/{ligand_file}")[0]
        for lig_atom in ligand.get_atoms():
            lig_id = lig_atom.get_id()
            for r in distance_grid:
                r = round(r, 2)
                lower_bound = ns.search(lig_atom.coord, radius=r-(dr/2))
                upper_bound = ns.search(lig_atom.coord, radius=r+(dr/2))
                prot_atoms = list(set(upper_bound)-set(lower_bound))
                if len(prot_atoms) > 0:
                    for prot_atom in prot_atoms:
                        prot_id = prot_atom.get_id()
                        if (prot_id, lig_id) not in pairs:
                            pairs[(prot_id, lig_id)] = dict()
                            pairs[(prot_id, lig_id)][r] = 1
                        else:
                            if round(r,2) not in pairs[(prot_id, lig_id)]:
                                pairs[(prot_id, lig_id)][r] = 1
                            else:
                                pairs[(prot_id, lig_id)][r] += 1


9.832976341247559


In [183]:
pairs

{('OD2', 'N'): {3.2: 1, 3.3: 1, 9.9: 1},
 ('CD1', 'N'): {4.0: 1,
  8.0: 1,
  8.9: 1,
  9.7: 2,
  9.8: 1,
  10.0: 1,
  3.8: 1,
  5.7: 1,
  9.4: 1},
 ('CG1', 'N'): {4.0: 1, 9.0: 1, 9.3: 1, 9.5: 1, 9.6: 1, 9.7: 2, 5.8: 1},
 ('CG', 'N'): {4.2: 1,
  4.4: 1,
  8.6: 2,
  8.7: 1,
  8.8: 1,
  9.1: 1,
  9.3: 1,
  9.9: 2,
  10.0: 2,
  4.9: 1,
  5.3: 1},
 ('O', 'N'): {4.3: 1,
  6.5: 2,
  6.6: 1,
  6.7: 2,
  6.8: 3,
  7.1: 1,
  8.5: 1,
  8.8: 2,
  9.4: 2,
  9.9: 1,
  4.9: 1,
  8.9: 1,
  9.6: 1,
  9.8: 1},
 ('CB', 'N'): {4.4: 1,
  5.2: 2,
  6.8: 1,
  8.8: 1,
  9.0: 2,
  9.2: 1,
  9.4: 2,
  9.8: 2,
  10.0: 2,
  4.8: 1,
  5.7: 1,
  5.8: 1,
  8.0: 1,
  9.1: 1,
  9.6: 1,
  9.7: 1,
  9.9: 1},
 ('N', 'N'): {4.7: 1,
  5.0: 1,
  5.1: 1,
  6.4: 1,
  6.7: 1,
  7.3: 1,
  8.0: 1,
  9.0: 1,
  9.2: 1,
  9.3: 4,
  9.5: 1,
  9.9: 1,
  10.0: 1,
  3.7: 1,
  4.9: 1,
  6.8: 1,
  6.9: 2,
  8.7: 1,
  9.7: 1},
 ('C', 'N'): {5.2: 1,
  5.5: 1,
  5.6: 2,
  6.0: 2,
  6.8: 1,
  7.1: 1,
  7.5: 1,
  8.2: 1,
  9.1: 1,
  9.8: 1,
 

In [155]:
import glob
import os
 


['1dxr_lig_0.pdb',
 '1dxr_lig_1.pdb',
 '1dxr_lig_10.pdb',
 '1dxr_lig_11.pdb',
 '1dxr_lig_12.pdb',
 '1dxr_lig_13.pdb',
 '1dxr_lig_14.pdb',
 '1dxr_lig_15.pdb',
 '1dxr_lig_16.pdb',
 '1dxr_lig_17.pdb',
 '1dxr_lig_18.pdb',
 '1dxr_lig_19.pdb',
 '1dxr_lig_2.pdb',
 '1dxr_lig_20.pdb',
 '1dxr_lig_21.pdb',
 '1dxr_lig_22.pdb',
 '1dxr_lig_23.pdb',
 '1dxr_lig_24.pdb',
 '1dxr_lig_3.pdb',
 '1dxr_lig_4.pdb',
 '1dxr_lig_5.pdb',
 '1dxr_lig_6.pdb',
 '1dxr_lig_7.pdb',
 '1dxr_lig_8.pdb',
 '1dxr_lig_9.pdb']

In [99]:
distances

{0.0001: {},
 0.10010000000000001: {},
 0.2001: {},
 0.30010000000000003: {},
 0.4001: {},
 0.5001: {},
 0.6001000000000001: {},
 0.7001000000000001: {},
 0.8001: {},
 0.9001: {},
 1.0001: {},
 1.1001: {},
 1.2001000000000002: {},
 1.3001: {},
 1.4001000000000001: {},
 1.5001: {},
 1.6001: {},
 1.7001000000000002: {},
 1.8001: {},
 1.9001000000000001: {},
 2.0001: {},
 2.1001000000000003: {('O4', 'N'): 1},
 2.2001000000000004: {},
 2.3001000000000005: {},
 2.4001000000000006: {},
 2.5001: {('O4', 'CA'): 1},
 2.6001000000000003: {},
 2.7001000000000004: {('OS', 'OD2'): 1, ('N4', 'O'): 1, ('O4', 'O'): 1},
 2.8001000000000005: {('O', 'N'): 1, ('OS', 'OD1'): 1},
 2.9001000000000006: {('N1', 'O'): 1,
  ('OS', 'OD1'): 1,
  ('O3', 'OD2'): 1,
  ('N5', 'O'): 1,
  ('O4', 'C'): 1},
 3.0001: {('CA', 'O'): 1, ('OS', 'OD2'): 1, ('CE2', 'CG'): 1},
 3.1001000000000003: {('N', 'OD2'): 1,
  ('CA3', 'O'): 1,
  ('N2', 'O'): 1,
  ('CD1', 'O'): 1},
 3.2001000000000004: {('OS', 'CG'): 1, ('CB2', 'O'): 1},
 3

In [124]:
fp = open("zou_train_list")
pdbs = []

for line in fp:
    line = line.strip().split()
    for code in line:
        pdbl.retrieve_pdb_file(code, file_format="pdb", pdir="train_zou")
        

Downloading PDB structure '11as'...
Downloading PDB structure '12as'...
Downloading PDB structure '1a28'...
Downloading PDB structure '1a2c'...
Downloading PDB structure '1a4w'...
Downloading PDB structure '1a5x'...
Downloading PDB structure '1a61'...
Downloading PDB structure '1a78'...
Downloading PDB structure '1a80'...
Downloading PDB structure '1a82'...
Downloading PDB structure '1a8g'...
Downloading PDB structure '1a8i'...
Downloading PDB structure '1a8r'...
Downloading PDB structure '1a9u'...
Downloading PDB structure '1aax'...
Downloading PDB structure '1ae8'...
Downloading PDB structure '1afe'...
Downloading PDB structure '1afq'...
Downloading PDB structure '1agw'...
Downloading PDB structure '1alw'...
Downloading PDB structure '1amw'...
Downloading PDB structure '1aqb'...
Downloading PDB structure '1aqw'...
Downloading PDB structure '1aqx'...
Downloading PDB structure '1arg'...
Downloading PDB structure '1asu'...
Downloading PDB structure '1au4'...
Downloading PDB structure '1

Downloading PDB structure '1gm8'...
Downloading PDB structure '1goo'...
Downloading PDB structure '1goy'...
Downloading PDB structure '1gp2'...
Downloading PDB structure '1gsa'...
Downloading PDB structure '1gx8'...
Downloading PDB structure '1gxa'...
Downloading PDB structure '1gz8'...
Downloading PDB structure '1h01'...
Downloading PDB structure '1h08'...
Downloading PDB structure '1h0s'...
Downloading PDB structure '1h0v'...
Downloading PDB structure '1h1r'...
Downloading PDB structure '1h3n'...
Downloading PDB structure '1h46'...
Downloading PDB structure '1h5u'...
Downloading PDB structure '1h6c'...
Downloading PDB structure '1h78'...
Downloading PDB structure '1h94'...
Downloading PDB structure '1h9z'...
Downloading PDB structure '1ha2'...
Downloading PDB structure '1hdo'...
Downloading PDB structure '1hg4'...
Downloading PDB structure '1hg5'...
Downloading PDB structure '1hi3'...
Downloading PDB structure '1hj6'...
Downloading PDB structure '1hk4'...
Downloading PDB structure '1

Downloading PDB structure '1ogx'...
Downloading PDB structure '1oh0'...
Downloading PDB structure '1oiq'...
Downloading PDB structure '1oir'...
Downloading PDB structure '1oit'...
Downloading PDB structure '1oja'...
Downloading PDB structure '1oni'...
Downloading PDB structure '1ony'...
Downloading PDB structure '1onz'...
Downloading PDB structure '1opk'...
Downloading PDB structure '1oss'...
Downloading PDB structure '1oth'...
Downloading PDB structure '1ouk'...
Downloading PDB structure '1ouy'...
Downloading PDB structure '1ove'...
Downloading PDB structure '1ox5'...
Downloading PDB structure '1oyn'...
Downloading PDB structure '1oz0'...
Downloading PDB structure '1p1q'...
Downloading PDB structure '1p2d'...
Downloading PDB structure '1p5e'...
Downloading PDB structure '1p5r'...
Downloading PDB structure '1p60'...
Downloading PDB structure '1p61'...
Downloading PDB structure '1p6x'...
Downloading PDB structure '1p72'...
Downloading PDB structure '1p84'...
Downloading PDB structure '1

In [103]:
from Bio.PDB import PDBList

In [122]:
pdbl = PDBList()



Structure exists: 'train_zou\pdb1aaq.ent' 


'train_zou\\pdb1aaq.ent'

In [114]:
pdb = PDBParser().get_structure("1aaq", "aa/pdb1aaq.ent")

In [119]:
for c in pdb[0]:
    for r in c:
        print(r)

<Residue PRO het=  resseq=1 icode= >
<Residue GLN het=  resseq=2 icode= >
<Residue ILE het=  resseq=3 icode= >
<Residue THR het=  resseq=4 icode= >
<Residue LEU het=  resseq=5 icode= >
<Residue TRP het=  resseq=6 icode= >
<Residue GLN het=  resseq=7 icode= >
<Residue ARG het=  resseq=8 icode= >
<Residue PRO het=  resseq=9 icode= >
<Residue LEU het=  resseq=10 icode= >
<Residue VAL het=  resseq=11 icode= >
<Residue THR het=  resseq=12 icode= >
<Residue ILE het=  resseq=13 icode= >
<Residue LYS het=  resseq=14 icode= >
<Residue ILE het=  resseq=15 icode= >
<Residue GLY het=  resseq=16 icode= >
<Residue GLY het=  resseq=17 icode= >
<Residue GLN het=  resseq=18 icode= >
<Residue LEU het=  resseq=19 icode= >
<Residue LYS het=  resseq=20 icode= >
<Residue GLU het=  resseq=21 icode= >
<Residue ALA het=  resseq=22 icode= >
<Residue LEU het=  resseq=23 icode= >
<Residue LEU het=  resseq=24 icode= >
<Residue ASP het=  resseq=25 icode= >
<Residue THR het=  resseq=26 icode= >
<Residue GLY het=  re

In [126]:
s = "pdb1a2c.ent"
s[3:7]

'1a2c'