Skip to content

Commit

Permalink
Merge pull request #86 from martimunicoy/check_PDB
Browse files Browse the repository at this point in the history
PDB checking
  • Loading branch information
martimunicoy committed Nov 6, 2020
2 parents 7884144 + 8279e07 commit 6f43122
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/releasehistory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ This minor release integrates the parameterization of OBC charges and scale fact
New features
""""""""""""
- `PR #88 <https://github.com/martimunicoy/peleffy/pull/88>`_: New method to retrieve atom degrees with RDKit.
- `PR #86 <https://github.com/martimunicoy/peleffy/pull/86>`_: New method to check the input PDB prior building the molecule.

Tests added
"""""""""""
- `PR #88 <https://github.com/martimunicoy/peleffy/pull/88>`_: Adds tests to validate the atom degrees getter.
- `PR #86 <https://github.com/martimunicoy/peleffy/pull/86>`_: Adds tests to validate the PDB check up.


1.0.0 - Full compatibility for OPLS2005 and OpenFF dihedrals and rotamer library improvements
Expand Down
10 changes: 10 additions & 0 deletions peleffy/data/tests/ethylene_error1.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
HETATM 1 C1 UNK 1 0.634 -0.083 0.007 1.00 0.00 C
HETATM 2 C1 UNK 1 -0.671 0.098 -0.007 1.00 0.00 C
HETATM 3 H1 UNK 1 1.354 0.739 0.049 1.00 0.00 H
HETATM 4 H2 UNK 1 1.082 -1.071 -0.022 1.00 0.00 H
HETATM 5 H3 UNK 1 -1.096 1.088 0.022 1.00 0.00 H
HETATM 6 H4 UNK 1 -1.302 -0.770 -0.049 1.00 0.00 H
CONECT 1 2 2 3 4
CONECT 2 5 6
END

10 changes: 10 additions & 0 deletions peleffy/data/tests/ethylene_error2.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
HETATM 1 C1 UNK 1 0.634 -0.083 0.007 1.00 0.00 C
HETATM 2 C2 UNK 1 -0.671 0.098 -0.007 1.00 0.00 C
HETATM 3 H1 UNK 1 1.354 0.739 0.049 1.00 0.00 H
HETATM 4 H2 UNK 1 1.082 -1.071 -0.022 1.00 0.00 H
HETATM 5 H3 UNK 2 -1.096 1.088 0.022 1.00 0.00 H
HETATM 6 H4 UNK 1 -1.302 -0.770 -0.049 1.00 0.00 H
CONECT 1 2 2 3 4
CONECT 2 5 6
END

10 changes: 10 additions & 0 deletions peleffy/data/tests/ethylene_error3.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
HETATM 1 C1 UNK 1 0.634 -0.083 0.007 1.00 0.00 C
HETATM 2 C2 UNK 1 -0.671 0.098 -0.007 1.00 0.00 C
HETATM 3 H1 UNN 1 1.354 0.739 0.049 1.00 0.00 H
HETATM 4 H2 UNK 1 1.082 -1.071 -0.022 1.00 0.00 H
HETATM 5 H3 UNK 1 -1.096 1.088 0.022 1.00 0.00 H
HETATM 6 H4 UNK 1 -1.302 -0.770 -0.049 1.00 0.00 H
CONECT 1 2 2 3 4
CONECT 2 5 6
END

8 changes: 8 additions & 0 deletions peleffy/data/tests/ethylene_error4.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
HETATM 1 C1 UNK 1 0.634 -0.083 0.007 1.00 0.00 C
HETATM 2 C2 UNK 1 -0.671 0.098 -0.007 1.00 0.00 C
HETATM 3 H1 UNK 1 1.354 0.739 0.049 1.00 0.00 H
HETATM 4 H2 UNK 1 1.082 -1.071 -0.022 1.00 0.00 H
HETATM 5 H3 UNK 1 -1.096 1.088 0.022 1.00 0.00 H
HETATM 6 H4 UNK 1 -1.302 -0.770 -0.049 1.00 0.00 H
END

49 changes: 49 additions & 0 deletions peleffy/tests/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,52 @@ def check_residue_name(name):
assert molecule.tag == 'BNZ', 'Unexpected molecule tag'
molecule.to_pdb_file('molecule.pdb')
check_residue_name('BNZ')

def test_PDB_checkup(self):
"""It tests the safety check function for PDB files."""

LIGAND_GOOD = get_data_file_path('ligands/ethylene.pdb')
LIGAND_ERROR1 = get_data_file_path('tests/ethylene_error1.pdb')
LIGAND_ERROR2 = get_data_file_path('tests/ethylene_error2.pdb')
LIGAND_ERROR3 = get_data_file_path('tests/ethylene_error3.pdb')
LIGAND_ERROR4 = get_data_file_path('tests/ethylene_error4.pdb')

# This should work without any complain
_ = Molecule(LIGAND_GOOD)

# All atom names need to be unique
with pytest.raises(Exception):
_ = Molecule(LIGAND_ERROR1)

# All residue ids must match
with pytest.raises(Exception):
_ = Molecule(LIGAND_ERROR2)

# All residue names must match
with pytest.raises(Exception):
_ = Molecule(LIGAND_ERROR3)

# Check warning message in the logger when connectivity is missing
import io
from peleffy.utils import Logger
import logging
from importlib import reload
logging.shutdown()
reload(logging)

log = Logger()
log.set_level('WARNING')

# Catch logger messages to string buffer
with io.StringIO() as buf:
log_handler = logging.StreamHandler(buf)
log._logger.handlers = list()
log._logger.addHandler(log_handler)

_ = Molecule(LIGAND_ERROR4)

output = buf.getvalue()

assert output == "Input PDB has no information about the " \
+ "connectivity and this could result in an unexpected " \
+ "bond assignment\n"
41 changes: 41 additions & 0 deletions peleffy/topology/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,44 @@ def _initialize(self):
from peleffy.forcefield.parameters import BaseParameterWrapper
self._parameters = BaseParameterWrapper()

def _PDB_checkup(self, path):
"""
Safety check for PDB files in order to properly handle exceptions
related with its format prior running the parameterization.
Parameters
----------
path : str
The path to a PDB with the molecule structure
"""

# Parse PDB file
atom_id, res_name, res_id = ([] for i in range(3))
connectivity = False
for line in open(path):
if line.startswith('HETATM'):
atom_id.append(line[13:16])
res_name.append(line[18:20])
res_id.append(line[23:26])
if line.startswith('CONECT'):
connectivity = True

# Handle exceptions related with the PDB file format
if not res_id[:-1] == res_id[1:]:
raise Exception(
'A single ligand with immutable residue ids is expected')
if not res_name[:-1] == res_name[1:]:
raise Exception(
'A single ligand with immutable residue names is expected')
if not len(atom_id) == len(set(atom_id)):
raise Exception(
'Ligand in input PDB has no unique atom names')
if not connectivity:
log = Logger()
log.warning(
"Input PDB has no information about the connectivity and"
+ " this could result in an unexpected bond assignment")

def _initialize_from_pdb(self, path):
"""
It initializes a molecule with the molecule structure read from
Expand All @@ -585,6 +623,9 @@ def _initialize_from_pdb(self, path):
logger.info(' - Initializing molecule from PDB')
self._initialize()

# Validate PDB
self._PDB_checkup(path)

logger.info(' - Loading molecule from RDKit')
rdkit_toolkit = RDKitToolkitWrapper()
self._rdkit_molecule = rdkit_toolkit.from_pdb(path)
Expand Down

0 comments on commit 6f43122

Please sign in to comment.