/
serial.py
158 lines (138 loc) · 5.73 KB
/
serial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
This is a class I use to _apply_ PLIP to a pd.Series of molecules.
It is not built for the project, but works.
Note I have not dumped the methods that are not needed for the project.
"""
import os
from pathlib import Path
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import PandasTools
from functools import singledispatchmethod
from typing import Tuple, Dict, List, Union
from collections import Counter, defaultdict
from plip.structure.preparation import PDBComplex, PLInteraction
from openbabel.pybel import Atom, Residue
from openbabel.pybel import ob
from fragmenstein.victor import MinimalPDBParser
import warnings
class SerialPLIPper:
"""
Calling the instance will return a ``Dict[Tuple[str, str, int], int]``,
where the key is interaction type, residue 3-letter name, residue index
and the value is the count of interactions.
Basically, applying Plip to a pd.Series of Chem.Mol.
Unplacking it is kind of wierd, the best way I reckon is a brutal for-loop:
.. code-block:: python
import pandas as pd
import pandera.typing as pdt
intxndexes: pdt.Series[Dict[Tuple[str, str, int], int]] = hits.ROMol.apply(SerialPLIPper(pdb_filename))
# columns will still be a tuple...:
intxn_df = pd.DataFrame(intxndexes.to_list()).fillna(0).astype(int)
hits['N_interactions'] = intxn_df.sum(axis='columns')
for c in sorted(intxn_df.columns, key=lambda kv: kv[2]):
# columns will be a colon-separated string:
hits[':'.join(map(str, c))] = intxn_df[c]
"""
def __init__(self, pdb_block: str, resn='LIG', chain='B'):
assert 'ATOM' in pdb_block, f'No ATOM entry in block provided: {pdb_block}'
self.pdb_block = pdb_block
self.resn = resn
self.chain = chain
@classmethod
def from_filename(cls, pdb_filename: str, *args, **kwargs):
"""
The main constructor is from PDB block, this is from PDB file
"""
with open(pdb_filename, 'r') as f:
pdb_block = f.read()
return cls(pdb_block, *args, **kwargs)
def __call__(self, mol) -> Dict[Tuple[str, str, int], int]:
if mol is None or not isinstance(mol, Chem.Mol) or mol.GetNumAtoms() == 0:
return {}
holo: str = self.plonk(mol)
interaction_set: PLInteraction = self.get_interaction_set(holo)
return self.get_interaction_counts(interaction_set)
def assign_pdb(self, mol: Chem.Mol):
"""
Fix the PDB info for the molecule, in place
"""
counts = defaultdict(int)
atom: Chem.Atom
for atom in mol.GetAtoms():
element: str = atom.GetSymbol()
counts[element] += 1
info = Chem.AtomPDBResidueInfo(atomName=f'{element: >2}{counts[element]: <2}',
residueName=self.resn,
residueNumber=1, chainId=self.chain)
atom.SetPDBResidueInfo(info)
def plonk(self, mol):
"""
Temporarily here. Do not copy.
There likely is a way to do this in OBabel
This is using Fragmenstein ``MinimalPDBParser``.
:param mol:
:return:
"""
pdbdata = MinimalPDBParser(self.pdb_block, remove_other_hetatms=True, ligname=self.resn)
self.assign_pdb(mol)
moldata = MinimalPDBParser(Chem.MolToPDBBlock(mol))
pdbdata.append(moldata)
return str(pdbdata)
@singledispatchmethod
def get_interaction_set(self) -> PLInteraction:
"""
Overloaded method: block or mol return the iternaction set
:return:
"""
raise NotImplementedError
@get_interaction_set.register
def _(self, block: str) -> PLInteraction:
holo = PDBComplex()
holo.load_pdb(block, as_string=True)
holo.analyze()
return holo.interaction_sets[':'.join([self.resn, self.chain, str(1)])]
@get_interaction_set.register
def _(self, mol: Chem.Mol) -> PLInteraction:
if mol.GetNumAtoms() == 0:
raise ValueError('Molecule has no atoms')
holo = PDBComplex()
holo.load_pdb(self.plonk(mol), as_string=True)
holo.analyze()
return holo.interaction_sets[':'.join([self.resn, self.chain, str(1)])]
def get_atomname(self, atom: Union[Atom, ob.OBAtom]) -> str:
"""
Given an atom, return its name.
"""
if isinstance(atom, Atom):
res: ob.OBResidue = atom.residue.OBResidue
obatom = atom.OBAtom
elif isinstance(atom, ob.OBAtom):
obatom: ob.OBAtom = atom
res: ob.OBResidue = obatom.GetResidue()
else:
raise TypeError
return res.GetAtomID(obatom)
def get_atom_by_atomname(self, residue: Union[ob.OBResidue, Residue], atomname: str) -> ob.OBAtom:
"""
Get an atom by its name in a residue.
"""
if isinstance(residue, Residue):
residue = residue.OBResidue
obatom: ob.OBAtom
for obatom in ob.OBResidueAtomIter(residue):
if residue.GetAtomID(obatom).strip() == atomname:
return obatom
else:
raise ValueError(f'No atom with name {atomname} in residue {residue.GetName()}')
def get_interaction_counts(self, interaction_set: PLInteraction) -> Dict[Tuple[str, str, int], int]:
"""
Count the number of interactions of each type for each residue
"""
intxns: List = interaction_set.all_itypes
intxn_dex = defaultdict(int)
for intxn in intxns:
key = (intxn.__class__.__name__, intxn.restype, intxn.resnr)
intxn_dex[key] += 1
return dict(sorted(intxn_dex.items(), key=lambda kv: kv[0][2]))