Skip to content

Commit

Permalink
Merge pull request #103 from marrink-lab/mol-compare
Browse files Browse the repository at this point in the history
Molecule comparison
  • Loading branch information
jbarnoud committed Feb 7, 2019
2 parents 811db0b + 6b18e2c commit 87a272e
Show file tree
Hide file tree
Showing 5 changed files with 1,397 additions and 45 deletions.
228 changes: 227 additions & 1 deletion vermouth/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
from collections import defaultdict, OrderedDict, namedtuple
import copy
from functools import partial
import itertools

import networkx as nx
import numpy as np

from . import graph_utils
from . import geometry
from . import utils


Interaction = namedtuple('Interaction', 'atoms parameters meta')
Expand Down Expand Up @@ -315,6 +317,20 @@ class Molecule(nx.Graph):
"""
Represents a molecule as per a specific force field. Consists of atoms
(nodes), bonds (edges) and interactions such as angle potentials.
Two molecules are equal if:
* the exclusion distance (nrexcl) are equal
* the force fields are equal (but may be different instances)
* the nodes are equal and in the same order
* the edges are equal (but order is not accounted for)
* the interactions are the same and in the same order within an interaction
type
When comparing molecules, the order of the nodes is considered as it
determines in what order atoms will be written in the output. Same goes for
the interactions within an interaction type. The order of edges is not
guaranteed anywhere in the code, and they are not writen in the output.
"""
# As the particles are stored as nodes, we want the nodes to stay
# ordered.
Expand All @@ -327,6 +343,15 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.interactions = defaultdict(list)

def __eq__(self, other):
return (
self.nrexcl == other.nrexcl
and self._force_field == other._force_field
and self.same_nodes(other)
and self.same_edges(other)
and self.same_interactions(other)
)

@property
def force_field(self):
"""
Expand Down Expand Up @@ -375,6 +400,7 @@ def subgraph(self, nodes):
Molecule
"""
subgraph = self.__class__()
subgraph.name = self.name
subgraph.meta = copy.copy(self.meta)
subgraph._force_field = self._force_field
subgraph.nrexcl = self.nrexcl
Expand Down Expand Up @@ -642,6 +668,127 @@ def share_moltype_with(self, other):
# the interactions.
return nx.is_isomorphic(self, other)

# TODO: Allow comparison of interactions betweem isomorphic molecules.
def same_interactions(self, other):
"""
Returns `True` if the interactions are the same.
To be equal, two interactions must share the same node key reference,
the same interaction parameters, and the same meta attributes. Empty
interaction categories are ignored.
Parameters
----------
other: Molecule
Returns
-------
bool
"""
keys_self = set(
interaction_type
for interaction_type, interactions
in self.interactions.items()
if interactions
)
keys_other = set(
interaction_type
for interaction_type, interactions
in other.interactions.items()
if interactions
)
# We first make sure that the two molecules share the same relevant
# interaction categories. A relevant category is one that actually has
# interactions. If the molecules share the relevant categories, then we
# can loop over the categories of one or the other, without issues with
# mismatches.
if keys_self != keys_other:
return False
return all(
self.interactions[interaction_type] == other.interactions[interaction_type]
for interaction_type in keys_self
)

# TODO: Allow default values for attributes.
# In most cases, we assume that an unspecified attribute is equivalent to
# the attribute being None; we should allow for this in the comparison. We
# should also allow an attribute to have an arbitrary default value. For
# instance, the assumed default for the `PTM_atom` attribute is False.
def same_nodes(self, other, ignore_attr=()):
"""
Returns `True` if the nodes are the same and in the same order.
The equality criteria used for the attribute values are those of
:func:`vermouth.utils.are_different`.
Parameters
----------
other: Molecule
ignore_attr: collections.abc.Container
Attribute keys that will not be considered in the comparison.
Returns
-------
bool
"""
# We first check that the node keys match between the two molecules.
# The order matters here, and we count on the fact that Molecules are
# OrderedDicts.
if list(self.nodes.keys()) != list(other.nodes.keys()):
return False
zipped_nodes = zip(self.nodes.values(), other.nodes.values())
for self_node, other_node in zipped_nodes:
# For each pair of nodes, we compare the attribute dictionaries.
# The order does not matter here.
self_keys = set(key for key in self_node if key not in ignore_attr)
other_keys = set(key for key in other_node if key not in ignore_attr)
if self_node.keys() != other_node.keys():
return False
# We can loop over the keys because we tested above that they were
# matching between the two dicts.
for key in self_keys:
self_value = self_node[key]
other_value = other_node[key]
if utils.are_different(self_value, other_value):
return False
return True

def same_edges(self, other):
"""
Compare the edges between this molecule and an other.
Edges are unordered and undirected, but they can have attributes.
Parameters
----------
other: networkx.Graph
The other molecule to compare the edges with.
Returns
-------
bool
"""
# The items of `graph.edges(data=True)` are formatted as
# (from, to, {dict of attributes}).
edges_self = {
frozenset(edge[:2]): edge[2]
for edge in self.edges(data=True)
}
edges_other = {
frozenset(edge[:2]): edge[2]
for edge in other.edges(data=True)
}
# We first start with the cheapest test: if the edges do not match
# regardless of attributes, we can save the more costly tests.
if set(edges_self.keys()) != set(edges_other.keys()):
return False

# We know that the keys in `edges_self` and `edges_other` are the same.
return all(
not utils.are_different(edges_self[edge], edges_other[edge])
for edge in edges_self
)

def iter_residues(self):
"""
Returns a generator over the nodes of this molecules residues.
Expand Down Expand Up @@ -723,6 +870,9 @@ class Block(Molecule):
"""
Residue topology template
Two blocks are equal if the underlying molecules are equal, and if the
block names are equal.
Parameters
----------
incoming_graph_data:
Expand Down Expand Up @@ -771,6 +921,9 @@ def _set_defaults(self, defaults):
if not hasattr(self, attribute):
setattr(self, attribute, default_value)

def __eq__(self, other):
return self.name == other.name and super().__eq__(other)

def __repr__(self):
name = self.name
if name is None:
Expand Down Expand Up @@ -981,10 +1134,26 @@ class Link(Block):
"""
Template link between two residues.
Two links are equal if:
* the underlying molecules are equal
* the names are equal
* the negative edges ("non-edges") are equal regardless of order
* the interactions to remove are the same and in the same order
* the meta variables are equal
* the pattern definitions are equal and in the same order
* the features are equals regardless of order
A link does not match if any of the non-edges match the target; their
order therefore is not important. Same goes for features that just need to
be present or not. The order does matter however for interactions to remove
as removing the interactions in a different order may lead to a different
set of remaining interactions.
Parameters
----------
incoming_graph_data:
Data to initialize graph. If None (default) an empty graph is created.
Data to initialize graph. If `None` (default) an empty graph is created.
attr:
Attributes to add to graph as key=value pairs.
"""
Expand All @@ -1005,6 +1174,63 @@ def __init__(self, incoming_graph_data=None, **attr):
self._set_defaults(defaults)
self._apply_to_all_nodes = {}

def __eq__(self, other):
# TODO: There is no good reason to care about the order of patterns
# except for the fact that order free comparison of non hashable things
# is a pain.
return (super().__eq__(other)
and self.same_non_edges(other)
and self.removed_interactions == other.removed_interactions
and self.molecule_meta == other.molecule_meta
and self.patterns == other.patterns
and set(self.features) == set(other.features)
)

def same_non_edges(self, other):
"""
Returns `True` if all the non-edges of an `other` link are equal to
those of this link. Returns `False` otherwise.
"""
# A non-edge is a list of two elements: the key to a node in the graph
# that is used as anchor, and an attribute dict that must match none of
# the atoms connected to the anchor.
# For the link to match, none of the non-edges must match. Therefore,
# their order do not matter. Though, because the attribute dicts are
# not hashable, non-edges cannot be put in a set.

# If the number of non-edges is not the same, we can save the hasle.
if len(self.non_edges) != len(other.non_edges):
return False
# If there is no non-edges, which is likely the most common case, we
# can also save some effort.
if not self.non_edges:
return True

# We sort the non-edges to get rid of the order. However, we cannot
# sort them on the attribute dict, and there may be more than one
# non-edge involving a given anchor. So for each anchor, we need to
# compare the dicts of that link with all the non already assigned
# dicts of the other link that share the same anchor.
sorted_self = sorted(self.non_edges, key=lambda x: x[0])
sorted_other = sorted(other.non_edges, key=lambda x: x[0])
zipped = zip(itertools.groupby(sorted_self, key=lambda x: x[0]),
itertools.groupby(sorted_other, key=lambda x: x[0]))
for (key_self, attrs_self), (key_other, attrs_other) in zipped:
if key_self != key_other:
return False
attrs_self = list(attrs_self)
attrs_other = list(attrs_other)
if len(attrs_self) != len(attrs_other):
return False
for attr_self in attrs_self:
for i, attr_other in enumerate(attrs_other):
if not utils.are_different(attr_self, attr_other):
del attrs_other[i]
break
else: # no break
return False
return True


def attributes_match(attributes, template_attributes, ignore_keys=()):
"""
Expand Down
45 changes: 1 addition & 44 deletions vermouth/tests/gmx/test_gro.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import hypothesis.extra.numpy as hnp

import vermouth
from vermouth.utils import are_different
from vermouth.molecule import Molecule
from vermouth.gmx import gro

Expand Down Expand Up @@ -418,31 +419,6 @@ def generate_equal_dict(draw):
return dict_a, dict_b


def are_different(left, right):
"""
Return True if two values are different from one another.
Values are considered different if they do not share the same type. In case
of numerical value, the comparison is done with :func:`numpy.isclose` to
account for rounding. In the context of this test, `nan` compares equal to
itself, which is not the default behavior.
"""
if not isinstance(left, right.__class__):
return True

left = np.asarray(left)
right = np.asarray(right)

if left.shape != right.shape:
return True

# For numbers, we want an approximate comparison to account for rounding
# errors; it only works for numbers, though.
try:
return not np.all(np.isclose(left, right, equal_nan=True))
except TypeError:
return np.any(left != right)


@st.composite
def generate_diff_dict(draw):
Expand Down Expand Up @@ -489,25 +465,6 @@ def test_compare_dict_diff(dict_a_and_b):
assert compare_dicts(dict_a, dict_b) # report is not empty


@pytest.mark.parametrize(
'left, right',
itertools.combinations(DIFFERENCE_USE_CASE, 2)
)
def test_are_different(left, right):
"""
Test that :func:`are_different` identify different values.
"""
assert are_different(left, right)


@pytest.mark.parametrize('left', DIFFERENCE_USE_CASE)
def test_not_are_different(left):
"""
Test that :func:`are_different` returns False for equal values.
"""
assert not are_different(left, left)


def test_filter_molecule_none(gro_reference): # pylint: disable=redefined-outer-name
"""
Test that :func:`filter_molecule` does not remove nodes when not asked for.
Expand Down

0 comments on commit 87a272e

Please sign in to comment.