In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from rdkit import Chem
from cbiprep.pdbatoms import PDBAtoms
from cbiprep.ligand_expo import LigandExpo
from cbiprep.atomtyper import AtomTyper, HybAtomTyper

np.set_printoptions(edgeitems=10)

In [3]:
pdb_atoms = PDBAtoms('pdb/4jn2.pdb.gz')

In [4]:
pdb_atoms.get_ligand_names()

['4CC', 'GOL']

In [5]:
ligand_atoms = pdb_atoms.get_ligand('4CC')

In [6]:
len(ligand_atoms)

35

In [7]:
protein_atoms = pdb_atoms.get_relevant_protein(ligand_atoms)

In [8]:
len(protein_atoms)

3336

In [9]:
pocket_atoms = protein_atoms.get_pocket(ligand_atoms, thres=5)
open('pocket.pdb', 'wt').write(str(pocket_atoms)+'\n')

15228

In [10]:
len(pocket_atoms)

188

### Test distance-based adjacency matrix

In [11]:
lig_adjmat = ligand_atoms.get_distance_based_adjacency_matrix(ligand_atoms, thres=4.0, diagzero=True)
lig_adjmat

array([[0, 1, 1, 1, 1, 0, 0, 1, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 1, 1, 1, 1, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 1, 0, 0, 1, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 0, 1, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 1, 0, ..., 1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 1, 0, 0, 0, 0, 0, 1, ..., 0, 1, 0, 1, 0, 0, 0, 1, 0, 0],
       [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, ..., 1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..., 0, 1, 0, 1, 0, 0, 0, 1, 1, 1],
       ...,
       [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ..., 0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, ..., 0, 0, 0, 1, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ..., 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 

In [12]:
pocket_adjmat = pocket_atoms.get_distance_based_adjacency_matrix(pocket_atoms, thres=4.0, diagzero=True)
pocket_adjmat

array([[0, 1, 1, 1, 1, 1, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 1, 1, 1, 1, 1, 1, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 1, 1, 1, 1, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 1, 1, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 0, 1, 1, 0, 1, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 0, 1, 1, 1, 0, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 1, 1, 0, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 1, 1, 1, 1, 1, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 1, 0, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 1, 1, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 

In [13]:
complex_adjmat = ligand_atoms.get_distance_based_adjacency_matrix(pocket_atoms, thres=4.0)
complex_adjmat

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 

### Padding

<img src="padding.png" width="40%" align="left" />

In [None]:
M = 50
N = 500

In [None]:
m = lig_adjmat.shape[0]
n = pocket_adjmat.shape[0]
m, M, n, N

In [None]:
mat = np.zeros((M+N, M+N), dtype=int)
mat.shape

In [None]:
mat[0:m,0:m] = lig_adjmat
mat[M:M+n,M:M+n] = pocket_adjmat
mat[0:m, M:M+n] = complex_adjmat
mat[M:M+n, 0:m] = complex_adjmat.T
mat

### Test atom typings

In [None]:
expo_dic = LigandExpo()
ligand_mol = expo_dic.assign(ligand_atoms, '4CC')
print(Chem.MolToSmiles(ligand_mol))

In [None]:
atomtyper = HybAtomTyper()
ligand_types = atomtyper(ligand_mol)
ligand_types

In [None]:
pocket_mol = Chem.MolFromPDBBlock(str(pocket_atoms))
pocket_types = atomtyper(pocket_mol)
pocket_types

In [None]:
types_vec = np.zeros(N+M, dtype=int)
types_vec[0:m] = ligand_types
types_vec[M:M+n] = pocket_types
types_vec

### Types Vec to One-Hot expression

In [None]:
onehot = np.zeros((atomtyper.MAX, N+M), dtype=int)
for i, v in enumerate(types_vec):
    if 0 < v:
        onehot[v-1, i] = 1
onehot

### Supplement, for any extension

In [None]:
for atom in ligand_mol.GetAtoms():
    idx = atom.GetIdx()
    an = atom.GetAtomicNum()
    el = atom.GetSymbol()
    arom = int(atom.GetIsAromatic())
    hyb = atom.GetHybridization()
    nH = atom.GetTotalNumHs()
    nH = f'({nH}H)' if nH != 0 else ''
    ch = atom.GetFormalCharge()
    ch = '(+)' if 0 < ch else '(-)' if ch < 0 else ''
    val = atom.GetTotalValence()
    deg = atom.GetTotalDegree()
    atomf = f'{idx:3d} {el}{ch}{nH}'
    typ = {Chem.HybridizationType.SP:1, Chem.HybridizationType.SP2:2, Chem.HybridizationType.SP3:3}.get(hyb)
    if arom:
        typ = 5
    print(f'{atomf:<14} hyb={typ} v,d={val},{deg}')

### Run Flare