In [1]:
import pandas as pd
import numpy as np
from rdkit import Chem
from cbiprep.pdbatoms import PDBAtoms
from cbiprep.ligand_expo import LigandExpo
from cbiprep.atomtyper import AtomTyper, HybAtomTyper

np.set_printoptions(edgeitems=8)



In [2]:
pdb_atoms = PDBAtoms('pdb/4jn2.pdb.gz')

In [3]:
pdb_atoms.get_ligand_names()

['4CC', 'GOL']

In [4]:
ligand_atoms = pdb_atoms.get_ligand('4CC')

In [5]:
len(ligand_atoms)

35

In [6]:
protein_atoms = pdb_atoms.get_relevant_protein(ligand_atoms)

In [7]:
len(protein_atoms)

3336

In [8]:
pocket_atoms = protein_atoms.get_pocket(ligand_atoms, thres=5)
open('pocket.pdb', 'wt').write(str(pocket_atoms)+'\n')

15228

In [9]:
len(pocket_atoms)

188

### Test distance-based adjacency matrix

In [10]:
lig_adjmat = ligand_atoms.get_distance_based_adjacency_matrix(ligand_atoms, thres=4.0, diagzero=True)
lig_adjmat

array([[0, 1, 1, 1, 1, 0, 0, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 1, 1, 1, 1, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 1, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 0, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 0, 0, 0, 1, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, ..., 1, 0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 1, 0, 0, 0, 0, ..., 0, 1, 0, 0, 0, 1, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0, 1, 0, ..., 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, ..., 0, 0, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 0, ..., 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, ..., 0, 1, 0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 0, 0, 0, 1, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 0, 0, 0, 1, 1, 

In [11]:
pocket_adjmat = pocket_atoms.get_distance_based_adjacency_matrix(pocket_atoms, thres=4.0, diagzero=True)
pocket_adjmat

array([[0, 1, 1, 1, 1, 1, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 1, 1, 1, 1, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 1, 1, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 0, 1, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 0, 1, 1, 0, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 0, 1, 1, 1, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 1, 0, 1, 1, 1, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 1, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 1, 1, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 1, 1, 1, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 1, 1, 1, 1, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 1, 1, 1, 1, 1, 

In [12]:
complex_adjmat = ligand_atoms.get_distance_based_adjacency_matrix(pocket_atoms, thres=4.0)
complex_adjmat

array([[0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 

### Test atom typings

In [13]:
expo_dic = LigandExpo()
ligand_mol = expo_dic.assign(ligand_atoms, '4CC')
print(Chem.MolToSmiles(ligand_mol))

Cn1c(CNc2ccc(C(=N)N)cc2)nc2cc(C(=O)N(CCC(=O)O)c3ccccn3)ccc21


In [14]:
atomtyper = HybAtomTyper()
v = atomtyper(ligand_mol)
v

array([ 8,  4,  4,  4,  1,  4,  4,  4,  4,  4,  4,  2,  4,  4,  8,  4,  1,
        2,  6,  6, 10,  4,  1,  4,  8,  1,  4,  4,  4,  4, 10, 10,  2,  6,
        6])

In [15]:
for atom in ligand_mol.GetAtoms():
    idx = atom.GetIdx()
    an = atom.GetAtomicNum()
    el = atom.GetSymbol()
    arom = int(atom.GetIsAromatic())
    hyb = atom.GetHybridization()
    nH = atom.GetTotalNumHs()
    nH = f'({nH}H)' if nH != 0 else ''
    ch = atom.GetFormalCharge()
    ch = '(+)' if 0 < ch else '(-)' if ch < 0 else ''
    val = atom.GetTotalValence()
    deg = atom.GetTotalDegree()
    atomf = f'{idx:3d} {el}{ch}{nH}'
    typ = {Chem.HybridizationType.SP:1, Chem.HybridizationType.SP2:2, Chem.HybridizationType.SP3:3}.get(hyb)
    if arom:
        typ = 5
    print(f'{atomf:<14} hyb={typ} v,d={val},{deg}')

  0 N          hyb=5 v,d=3,3
  1 C(1H)      hyb=5 v,d=4,3
  2 C(1H)      hyb=5 v,d=4,3
  3 C          hyb=5 v,d=4,3
  4 C(3H)      hyb=3 v,d=4,4
  5 C          hyb=5 v,d=4,3
  6 C          hyb=5 v,d=4,3
  7 C(1H)      hyb=5 v,d=4,3
  8 C(1H)      hyb=5 v,d=4,3
  9 C(1H)      hyb=5 v,d=4,3
 10 C(1H)      hyb=5 v,d=4,3
 11 C          hyb=2 v,d=4,3
 12 C          hyb=5 v,d=4,3
 13 C          hyb=5 v,d=4,3
 14 N          hyb=5 v,d=3,2
 15 C(1H)      hyb=5 v,d=4,3
 16 C(2H)      hyb=3 v,d=4,4
 17 C          hyb=2 v,d=4,3
 18 N(1H)      hyb=2 v,d=3,3
 19 N          hyb=2 v,d=3,3
 20 O          hyb=2 v,d=2,1
 21 C          hyb=5 v,d=4,3
 22 C(2H)      hyb=3 v,d=4,4
 23 C(1H)      hyb=5 v,d=4,3
 24 N          hyb=5 v,d=3,2
 25 C(2H)      hyb=3 v,d=4,4
 26 C(1H)      hyb=5 v,d=4,3
 27 C(1H)      hyb=5 v,d=4,3
 28 C          hyb=5 v,d=4,3
 29 C(1H)      hyb=5 v,d=4,3
 30 O          hyb=2 v,d=2,1
 31 O(1H)      hyb=2 v,d=2,2
 32 C          hyb=2 v,d=4,3
 33 N(1H)      hyb=2 v,d=3,2
 34 N(2H)     

### Run Flare