Skip to content

Commit

Permalink
Merge pull request #113 from marrink-lab/fix-write-graph
Browse files Browse the repository at this point in the history
 Fix the -write-* debug options in martinize2
  • Loading branch information
pckroon committed Sep 19, 2018
2 parents 68cd9f9 + 6e694e7 commit b187757
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 26 deletions.
18 changes: 13 additions & 5 deletions bin/martinize2
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,15 @@ def pdb_to_universal(system, delete_unknown=False,
canonicalized.force_field = force_field
vermouth.MakeBonds().run_system(canonicalized)
if write_graph is not None:
vermouth.pdb.write_pdb(system, str(write_graph), omit_charges=True)
vermouth.pdb.write_pdb(canonicalized, str(write_graph), omit_charges=True)
vermouth.RepairGraph(delete_unknown=delete_unknown).run_system(canonicalized)
if write_repair is not None:
vermouth.pdb.write_pdb(system, str(write_repair), omit_charges=True)
vermouth.pdb.write_pdb(canonicalized, str(write_repair),
omit_charges=True, nan_missing_pos=True)
vermouth.CanonicalizeModifications().run_system(canonicalized)
if write_canon is not None:
vermouth.pdb.write_pdb(system, str(write_canon), omit_charges=True)
vermouth.pdb.write_pdb(canonicalized, str(write_canon),
omit_charges=True, nan_missing_pos=True)
vermouth.AttachMass(attribute='mass').run_system(canonicalized)
return canonicalized

Expand Down Expand Up @@ -286,10 +288,16 @@ def entry():
debug_group.add_argument('-write-graph', type=Path, default=None,
help='Write the graph as PDB after the MakeBonds step.')
debug_group.add_argument('-write-repair', type=Path, default=None,
help='Write the graph as PDB after the RepairGraph step.')
help=('Write the graph as PDB after the '
'RepairGraph step. The resulting file may '
'contain "nan" coordinates making it '
'unreadable by most softwares.'))
debug_group.add_argument('-write-canon', type=Path, default=None,
help=('Write the graph as PDB after the '
'CanonicalizeModifications step.'))
'CanonicalizeModifications step. The '
'resulting file may contain "nan" '
'coordinates making it unreadable by most '
'softwares.'))

args = parser.parse_args()

Expand Down
32 changes: 24 additions & 8 deletions vermouth/pdb/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
Provides functions for reading and writing PDB files.
"""


from functools import partial

import numpy as np
Expand Down Expand Up @@ -51,7 +50,7 @@ def get_not_none(node, attr, default):
return value


def write_pdb_string(system, conect=True, omit_charges=True):
def write_pdb_string(system, conect=True, omit_charges=True, nan_missing_pos=False):
"""
Describes `system` as a PDB formatted string. Will create CONECT records
from the edges in the molecules in `system` iff `conect` is True.
Expand All @@ -65,6 +64,12 @@ def write_pdb_string(system, conect=True, omit_charges=True):
omit_charges: bool
Whether charges should be omitted. This is usually a good idea since
the PDB format can only deal with integer charges.
nan_missing_pos: bool
Wether the writing should fail if an atom does not have a position.
When set to `True`, atoms without coordinates will be written
with 'nan' as coordinates; this will cause the output file to be
*invalid* for most uses.
for most use.
Returns
-------
Expand Down Expand Up @@ -100,7 +105,14 @@ def keyfunc(graph, node_idx):
chain = node['chain']
resid = node['resid']
insertion_code = get_not_none(node, 'insertioncode', '')
x, y, z = node['position'] * 10 # converting from nm to A # pylint: disable=invalid-name
try:
# converting from nm to A
x, y, z = node['position'] * 10 # pylint: disable=invalid-name
except KeyError:
if nan_missing_pos:
x = y = z = float('nan') # pylint: disable=invalid-name
else:
raise
occupancy = get_not_none(node, 'occupancy', 1)
temp_factor = get_not_none(node, 'temp_factor', 0)
element = get_not_none(node, 'element', '')
Expand Down Expand Up @@ -138,7 +150,7 @@ def keyfunc(graph, node_idx):
return '\n'.join(out)


def write_pdb(system, path, conect=True, omit_charges=True):
def write_pdb(system, path, conect=True, omit_charges=True, nan_missing_pos=False):
"""
Writes `system` to `path` as a PDB formatted string.
Expand All @@ -153,13 +165,19 @@ def write_pdb(system, path, conect=True, omit_charges=True):
omit_charges: bool
Whether charges should be omitted. This is usually a good idea since
the PDB format can only deal with integer charges.
nan_missing_pos: bool
Wether the writing should fail if an atom does not have a position.
When set to `True`, atoms without coordinates will be written
with 'nan' as coordinates; this will cause the output file to be
*invalid* for most uses.
for most use.
See Also
--------
:func:write_pdb_string
"""
with open(path, 'w') as out:
out.write(write_pdb_string(system, conect, omit_charges))
out.write(write_pdb_string(system, conect, omit_charges, nan_missing_pos))


def do_conect(mol, conectlist):
Expand Down Expand Up @@ -194,8 +212,6 @@ def do_conect(mol, conectlist):
dist = distance(mol.node[at0]['position'], mol.node[atom]['position'])
mol.add_edge(at0, atom, distance=dist)

return


def read_pdb(file_name, exclude=('SOL',), ignh=False, model=0):
"""
Expand Down Expand Up @@ -240,7 +256,7 @@ def read_pdb(file_name, exclude=('SOL',), ignh=False, model=0):
record = line[:6]
if record == 'ENDMDL':
models.append(Molecule())
elif record == 'ATOM ' or record == 'HETATM':
elif record in ('ATOM ', 'HETATM'):
properties = {}
for name, type_, slice_ in zip(field_names, field_types, slices):
properties[name] = type_(line[slice_].strip())
Expand Down
4 changes: 2 additions & 2 deletions vermouth/tests/gmx/test_gro.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def filter_molecule(molecule, exclude, ignh):
Remove nodes from a graph based on some criteria.
Nodes are removed if their resname matches one from
the `exclude` argument, or of `ighn` is :bool:`True` and the node is a
the `exclude` argument, or of `ighn` is `True` and the node is a
hydrogen.
The atoms are renumbered after the filtered atoms are removed. This will
Expand All @@ -295,7 +295,7 @@ def filter_molecule(molecule, exclude, ignh):
exclude: list
List of residue name to exclude.
ighn: bool
If :bool:`True`, the hydrogens are excluded.
If `True`, the hydrogens are excluded.
"""
to_remove = []
exclusions = [('resname', value) for value in exclude]
Expand Down
100 changes: 89 additions & 11 deletions vermouth/tests/pdb/test_write_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,47 @@
Unittests for the PDB writer.
"""

# Pylint is wrongly complaining about fixtures.
# pylint: disable=redefined-outer-name


import numpy as np
import networkx as nx

import pytest

import vermouth
from vermouth.molecule import Molecule
from vermouth.pdb import pdb


def test_conect():
@pytest.fixture
def dummy_system():
"""
Build a system with 3 dummy molecules.
All nodes have the following attributes:
* `atomname`
* `resname`
* `resid`
* `chain` set to `''`
* `element` set to the value of `atomname`
* `positions`
Some nodes have a `charge` attribute set to an integer charge. The
molecules are connected.
"""
nodes = (
{'atomname': 'A', 'resname': 'A', 'resid': 1, },
{'atomname': 'B', 'resname': 'A', 'resid': 1, 'charge': -1},
{'atomname': 'C', 'resname': 'A', 'resid': 2, 'charge': 1},
{'atomname': 'D', 'resname': 'A', 'resid': 3, },
{'atomname': 'E', 'resname': 'A', 'resid': 4, },
{'atomname': 'F', 'resname': 'B', 'resid': 4, },
{'atomname': 'G', 'resname': 'B', 'resid': 4, },
{'atomname': 'H', 'resname': 'B', 'resid': 4, },
)
{'atomname': 'A', 'resname': 'A', 'resid': 1, },
{'atomname': 'B', 'resname': 'A', 'resid': 1, 'charge': -1},
{'atomname': 'C', 'resname': 'A', 'resid': 2, 'charge': 1},
{'atomname': 'D', 'resname': 'A', 'resid': 3, },
{'atomname': 'E', 'resname': 'A', 'resid': 4, },
{'atomname': 'F', 'resname': 'B', 'resid': 4, },
{'atomname': 'G', 'resname': 'B', 'resid': 4, },
{'atomname': 'H', 'resname': 'B', 'resid': 4, },
)
edges = [(0, 1), (2, 3), (4, 5), (5, 6), (5, 7)]
graph = nx.Graph()
for idx, node in enumerate(nodes):
Expand All @@ -48,7 +70,26 @@ def test_conect():
for component in nx.connected_components(graph)]
system = vermouth.system.System()
system.molecules = molecules
pdb_found = pdb.write_pdb_string(system, conect=True, omit_charges=False)
return system


@pytest.fixture
def missing_pos_system(dummy_system):
"""
Build a system of dummy molecules with some nodes lacking a `position`
attribute.
"""
for node in dummy_system.molecules[1].nodes.values():
del node['position']
return dummy_system


def test_conect(dummy_system):
"""
Test that the CONECT record is written as expected. Test also that the
overall format is correct.
"""
pdb_found = pdb.write_pdb_string(dummy_system, conect=True, omit_charges=False)
expected = '''
ATOM 1 A A 1 10.000 20.000 -30.000 1.00 0.00 A
ATOM 2 B A 1 10.000 20.000 -30.000 1.00 0.00 B 1-
Expand All @@ -68,3 +109,40 @@ def test_conect():
END
'''
assert pdb_found.strip() == expected.strip()


def test_write_failure_missing_pos(missing_pos_system):
"""
Make sure the writing fails when coordinates are missing and
`nan_missing_pos` is not set. (Shall be `False` by default.)
"""
with pytest.raises(KeyError):
pdb.write_pdb_string(missing_pos_system)


def test_write_success_missing_pos(missing_pos_system):
"""
Make sure the writing succeed when coordinates are missing and
`nan_missing_pos` is `True`.
"""
pdb_found = pdb.write_pdb_string(
missing_pos_system,
conect=False,
omit_charges=False,
nan_missing_pos=True, # Argument of interest!
)
expected = '''
ATOM 1 A A 1 10.000 20.000 -30.000 1.00 0.00 A
ATOM 2 B A 1 10.000 20.000 -30.000 1.00 0.00 B 1-
TER 3 A 1
ATOM 4 C A 2 nan nan nan 1.00 0.00 C 1+
ATOM 5 D A 3 nan nan nan 1.00 0.00 D
TER 6 A 3
ATOM 7 E A 4 10.000 20.000 -30.000 1.00 0.00 E
ATOM 8 F B 4 10.000 20.000 -30.000 1.00 0.00 F
ATOM 9 G B 4 10.000 20.000 -30.000 1.00 0.00 G
ATOM 10 H B 4 10.000 20.000 -30.000 1.00 0.00 H
TER 11 B 4
END
'''
assert pdb_found.strip() == expected.strip()

0 comments on commit b187757

Please sign in to comment.