Skip to content

Commit

Permalink
Merge pull request #122 from marrink-lab/minor-optim
Browse files Browse the repository at this point in the history
Minor performance optimizations
  • Loading branch information
pckroon committed Sep 27, 2018
2 parents 16b361c + 51dd146 commit 8f9fbe0
Show file tree
Hide file tree
Showing 9 changed files with 328 additions and 59 deletions.
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ env:
- VERMOUTH_TEST_DSSP=mkdssp
- SKIP_GENERATE_AUTHORS=1
- SKIP_WRITE_GIT_CHANGELOG=1
matrix:
- WITH_SCIPY=true
install:
- pip install --upgrade setuptools pip
- if [[ ${WITH_SCIPY} == true ]]; then pip install scipy; fi
- pip install --upgrade -r requirements-tests.txt
- pip install --upgrade .
script:
Expand Down Expand Up @@ -42,6 +45,7 @@ jobs:
sudo: true
- python: "3.8-dev"
sudo: true
env: WITH_SCIPY=false

- stage: docs
python: "3.5"
Expand Down
2 changes: 1 addition & 1 deletion bin/martinize2
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def pdb_to_universal(system, delete_unknown=False,
if write_graph is not None:
vermouth.pdb.write_pdb(canonicalized, str(write_graph), omit_charges=True)
LOGGER.info('Repairing the graph.', type='step')
vermouth.RepairGraph(delete_unknown=delete_unknown).run_system(canonicalized)
vermouth.RepairGraph(delete_unknown=delete_unknown, include_graph=False).run_system(canonicalized)
if write_repair is not None:
vermouth.pdb.write_pdb(canonicalized, str(write_repair),
omit_charges=True, nan_missing_pos=True)
Expand Down
72 changes: 48 additions & 24 deletions vermouth/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,30 +348,48 @@ def atoms(self):
node_attr = self.node[node]
yield node, node_attr

def copy(self, as_view=False):
'''
def copy(self):
"""
Creates a copy of the molecule.
See Also
--------
:meth:`networkx.Graph.copy`
'''
copy = super().copy(as_view)
if not as_view:
copy = self.__class__(copy)
copy._force_field = self.force_field
copy.meta = self.meta.copy()
return copy

def subgraph(self, *args, **kwargs):
'''
Returns
-------
Molecule
"""
return self.subgraph(self.nodes)

def subgraph(self, nodes):
"""
Creates a subgraph from the molecule.
See Also
--------
:meth:`networkx.Graph.subgraph`
'''
return self.__class__(super().subgraph(*args, **kwargs))
Returns
-------
Molecule
"""
subgraph = self.__class__()
subgraph.meta = copy.copy(self.meta)
subgraph._force_field = self._force_field
subgraph.nrexcl = self.nrexcl

node_copies = [(node, copy.copy(self.nodes[node])) for node in nodes]
subgraph.add_nodes_from(node_copies)

nodes = set(nodes)

#edges_to_add = [
# (node, node2)
# for node in nodes
# for node2 in set(self[node]) & nodes
#]
subgraph.add_edges_from(self.edges_between(nodes, nodes, data=True))

for interaction_type, interactions in self.interactions.items():
for interaction in interactions:
if all(atom in nodes for atom in interaction.atoms):
subgraph.interactions[interaction_type].append(interaction)

return subgraph

def add_interaction(self, type_, atoms, parameters, meta=None):
"""
Expand Down Expand Up @@ -628,7 +646,7 @@ def iter_residues(self):
residue_graph = graph_utils.make_residue_graph(self)
return (tuple(residue_graph.nodes[res]['graph'].nodes) for res in residue_graph.nodes)

def edges_between(self, n_bunch1, n_bunch2):
def edges_between(self, n_bunch1, n_bunch2, data=False):
"""
Returns all edges in this molecule between nodes in `n_bunch1` and
`n_bunch2`.
Expand All @@ -646,9 +664,15 @@ def edges_between(self, n_bunch1, n_bunch2):
A list of tuples of edges in this molecule. The first element of
the tuple will be in `n_bunch1`, the second element in `n_bunch2`.
"""
return [(node1, node2)
for node1, node2 in itertools.product(n_bunch1, n_bunch2)
if self.has_edge(node1, node2)]
set_1 = set(n_bunch1)
set_2 = set(n_bunch2)
for node1 in set_1:
cross = set_2 & set(self[node1])
for node2 in cross:
if not data:
yield (node1, node2)
else:
yield (node1, node2, self.edges[node1, node2])


class Block(Molecule):
Expand Down
47 changes: 36 additions & 11 deletions vermouth/processors/make_bonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def bonds_from_distance(system, fudge=1.1):
The possible distance between nodes is determined by values in
`VDW_RADII`.
Notes
-----
Elements that are not in `VDW_RADII` do not make bonds.
Parameters
----------
system: :class:`~vermouth.system.System`
Expand All @@ -76,25 +80,46 @@ def bonds_from_distance(system, fudge=1.1):
certain distance from each other. It is probably disconnected.
"""
system = nx.compose_all(system.molecules)
idx_to_nodenum = {idx: n for idx, n in enumerate(system)}
max_dist = max(VDW_RADII.get(node.get('element'), 0.2) for node in system.nodes.values())
positions = np.array([system.node[n]['position'] for n in system], dtype=float)
# We filter out the nodes for which we do not know the radius. Indeed, we
# consider these nodes cannot make bonds. The filtering is done before we
# enter the KDTree; we only provide to the KDTree the position of the nodes
# that could make a bond. `idx_to_nodenum` make the link between the
# indices in the `positions` array, and the node keys in the `system`
# graph.
idx_to_nodenum = {
idx: n
for idx, n in enumerate(
subn
for subn in system
if system.nodes[subn].get('element') in VDW_RADII
)
}
max_dist = max(
VDW_RADII[node.get('element')]
for node in system.nodes.values()
if node.get('element') in VDW_RADII
)
positions = np.array([
node['position']
for node in system.nodes.values()
if node.get('element') in VDW_RADII
], dtype=float)
tree = KDTree(positions)
pairs = tree.query_pairs(2*max_dist*fudge) # eps=fudge-1?
pairs = tree.sparse_distance_matrix(tree, max_dist * fudge)

for idx1, idx2 in pairs:
for (idx1, idx2), dist in pairs.items():
if idx1 >= idx2:
continue
node_idx1 = idx_to_nodenum[idx1]
node_idx2 = idx_to_nodenum[idx2]
atom1 = system.node[node_idx1]
atom2 = system.node[node_idx2]
element1 = atom1['element']
element2 = atom2['element']
if element1 in VDW_RADII and element2 in VDW_RADII:
# Elements we do not know never make bonds.
dist = distance(atom1['position'], atom2['position'])
bond_distance = 0.5 * (VDW_RADII[element1] + VDW_RADII[element2])
if dist <= bond_distance * fudge:
system.add_edge(node_idx1, node_idx2, distance=dist)

bond_distance = 0.5 * (VDW_RADII[element1] + VDW_RADII[element2])
if dist <= bond_distance * fudge:
system.add_edge(node_idx1, node_idx2, distance=dist)
return system


Expand Down
21 changes: 14 additions & 7 deletions vermouth/processors/repair_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def make_reference(mol):
return reference_graph


def repair_residue(molecule, ref_residue):
def repair_residue(molecule, ref_residue, include_graph):
"""
Rebuild missing atoms and canonicalize atomnames
"""
Expand All @@ -147,8 +147,8 @@ def repair_residue(molecule, ref_residue):
if ref_idx in match:
res_idx = match[ref_idx]
node = molecule.nodes[res_idx]
# Copy, because there are references everywhere.
node['graph'] = molecule.subgraph([res_idx]).copy()
if include_graph:
node['graph'] = molecule.subgraph([res_idx])
node.update(reference.nodes[ref_idx])
# Update found as well to keep found and molecule in line. It would
# be better to try and figure why found is not a reference, but meh
Expand Down Expand Up @@ -224,7 +224,7 @@ def repair_residue(molecule, ref_residue):
type='missing-atom')


def repair_graph(molecule, reference_graph):
def repair_graph(molecule, reference_graph, include_graph=True):
"""
Repairs a molecule graph produced based on the information in
``reference_graph``. Missing atoms will be added and atom- and residue-
Expand All @@ -235,6 +235,9 @@ def repair_graph(molecule, reference_graph):
are added, atom and residue names are canonicalized, and PTM atoms are
marked.
If `include_graph` is `True`, then the subgraph corresponding to each node
is included in the node under the "graph" attribute.
Parameters
----------
molecule : molecule.Molecule
Expand All @@ -256,10 +259,13 @@ def repair_graph(molecule, reference_graph):
:match: A dictionary describing how the reference corresponds
with the provided graph. Keys are node indices of the
reference, values are node indices of the provided graph.
include_graph: bool
Include the subgraph in the nodes.
"""
for residx in reference_graph:
residue = reference_graph.nodes[residx]
repair_residue(molecule, residue)
repair_residue(molecule, residue, include_graph=include_graph)
# Atomnames are canonized, and missing atoms added
found = reference_graph.nodes[residx]['found']
match = reference_graph.nodes[residx]['match']
Expand All @@ -280,14 +286,15 @@ def repair_graph(molecule, reference_graph):


class RepairGraph(Processor):
def __init__(self, delete_unknown=False):
def __init__(self, delete_unknown=False, include_graph=True):
super().__init__()
self.delete_unknown = delete_unknown
self.include_graph=include_graph

def run_molecule(self, molecule):
molecule = molecule.copy()
reference_graph = make_reference(molecule)
repair_graph(molecule, reference_graph)
repair_graph(molecule, reference_graph, include_graph=self.include_graph)
return molecule

def run_system(self, system):
Expand Down
38 changes: 38 additions & 0 deletions vermouth/redistributed/kdtree.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Copyright Anne M. Archibald 2008
# Released under the scipy license


# KDTree.sparse_distance_matrix has been added as part of the vermouth library.
# It does not exactly match the scipy corresponding function from the cKDTree
# module.

from __future__ import division, print_function, absolute_import

import sys
Expand Down Expand Up @@ -694,6 +700,38 @@ def traverse_no_checking(node1, node2):
other.tree, Rectangle(other.maxes, other.mins))
return results

def sparse_distance_matrix(self, other, max_distance, p=2.0):
"""
Emulate :meth:`scipy.spacial.cKDtree.sparse_distance_matrix` without scipy.
The return value is a dictionary instead of scipy sparse matrix.
Notes
-----
This is not the original scipy method!
Parameters
----------
other: KDTree
max_distance: positive float
p: float, optional
Returns
-------
dict
Pseudo `dok_matrix` where the keys are
`(index_in_self, index_in_other)` and the values are distances.
"""
result = {}
pairs = self.query_ball_tree(other, max_distance, p=p)
for idx1, neighbors in enumerate(pairs):
for idx2 in neighbors:
dist = minkowski_distance(self.data[idx1], other.data[idx2], p=p)
if dist <= max_distance:
result[(idx1, idx2)] = dist
return result


def query_pairs(self, r, p=2., eps=0):
"""
Find all pairs of points within a distance.
Expand Down

0 comments on commit 8f9fbe0

Please sign in to comment.