Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix isomorphic for molecular graphs #3221

Merged
merged 6 commits into from Aug 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
130 changes: 69 additions & 61 deletions pymatgen/analysis/graphs.py
Expand Up @@ -60,7 +60,7 @@ def _igraph_from_nxgraph(graph):
return new_igraph


def _isomorphic(frag1, frag2):
def _isomorphic(frag1: nx.Graph, frag2: nx.Graph) -> bool:
"""
Internal function to check if two graph objects are isomorphic, using igraph if
if is available and networkx if it is not.
Expand All @@ -69,8 +69,9 @@ def _isomorphic(frag1, frag2):
f2_nodes = frag2.nodes(data=True)
if len(f1_nodes) != len(f2_nodes):
return False
f1_edges = frag1.edges()
f2_edges = frag2.edges()
if len(f2_edges) != len(f2_edges):
if len(f1_edges) != len(f2_edges):
return False
f1_comp_dict = {}
f2_comp_dict = {}
Expand All @@ -96,34 +97,38 @@ def _isomorphic(frag1, frag2):

class StructureGraph(MSONable):
"""
This is a class for annotating a Structure with bond information, stored in the form
of a graph. A "bond" does not necessarily have to be a chemical bond, but can store
any kind of information that connects two Sites.
This is a class for annotating a Structure with
bond information, stored in the form of a graph. A "bond" does
not necessarily have to be a chemical bond, but can store any
kind of information that connects two Sites.
"""

def __init__(self, structure: Structure, graph_data=None):
"""
If constructing this class manually, use the with_empty_graph method or
with_local_env_strategy method (using an algorithm provided by the local_env
module, such as O'Keeffe).
If constructing this class manually, use the `with_empty_graph`
method or `with_local_env_strategy` method (using an algorithm
provided by the `local_env` module, such as O'Keeffe).

This class that contains connection information: relationships between sites
represented by a Graph structure, and an associated structure object.
This class that contains connection information:
relationships between sites represented by a Graph structure,
and an associated structure object.

This class uses the NetworkX package to store and operate
on the graph itself, but contains a lot of helper methods
to make associating a graph with a given crystallographic
structure easier.

This class uses the NetworkX package to store and operate on the graph itself, but
contains a lot of helper methods to make associating a graph with a given
crystallographic structure easier.
Use cases for this include storing bonding information,
NMR J-couplings, Heisenberg exchange parameters, etc.

Use cases for this include storing bonding information, NMR J-couplings,
Heisenberg exchange parameters, etc.
For periodic graphs, class stores information on the graph
edges of what lattice image the edge belongs to.

For periodic graphs, class stores information on the graph edges of what lattice
image the edge belongs to.
:param structure: a Structure object

Args:
structure (Structure): Structure object to be analyzed.
graph_data (dict): Dictionary containing graph information. Not intended to be
constructed manually see as_dict method for format.
:param graph_data: dict containing graph information in
dict format (not intended to be constructed manually,
see as_dict method for format)
"""
if isinstance(structure, StructureGraph):
# just make a copy from input
Expand All @@ -132,43 +137,46 @@ def __init__(self, structure: Structure, graph_data=None):
self.structure = structure
self.graph = nx.readwrite.json_graph.adjacency_graph(graph_data)

# tidy up edge attr dicts, reading to/from json duplicates information
for _, _, _, dct in self.graph.edges(keys=True, data=True):
# tidy up edge attr dicts, reading to/from json duplicates
# information
for _, _, _, d in self.graph.edges(keys=True, data=True):
for key in ("id", "key"):
dct.pop(key, None)
# ensure images are tuples (conversion to lists happens when serializing back
# from json), it's important images are hashable/immutable
if to_jimage := dct.get("to_jimage"):
dct["to_jimage"] = tuple(to_jimage)
if from_jimage := dct.get("from_jimage"):
dct["from_jimage"] = tuple(from_jimage)
d.pop(key, None)
# ensure images are tuples (conversion to lists happens
# when serializing back from json), it's important images
# are hashable/immutable
if "to_jimage" in d:
d["to_jimage"] = tuple(d["to_jimage"])
if "from_jimage" in d:
d["from_jimage"] = tuple(d["from_jimage"])

@classmethod
def with_empty_graph(
cls,
structure: Structure,
name: str = "bonds",
edge_weight_name: str | None = None,
edge_weight_units: str | None = None,
) -> StructureGraph:
name="bonds",
edge_weight_name=None,
edge_weight_units=None,
):
"""
Constructor for an empty StructureGraph, i.e. no edges, containing only nodes corresponding
to sites in Structure.

Args:
structure: A pymatgen Structure object.
name: Name of the graph, e.g. "bonds".
edge_weight_name: Name of the edge weights, e.g. "bond_length" or "exchange_constant".
edge_weight_units: Name of the edge weight units, e.g. "Å" or "eV".
Constructor for StructureGraph, returns a StructureGraph
object with an empty graph (no edges, only nodes defined
that correspond to Sites in Structure).

Returns:
StructureGraph: an empty graph with no edges, only nodes defined
that correspond to sites in Structure.
:param structure (Structure):
:param name (str): name of graph, e.g. "bonds"
:param edge_weight_name (str): name of edge weights,
e.g. "bond_length" or "exchange_constant"
:param edge_weight_units (str): name of edge weight units
e.g. "Å" or "eV"
:return (StructureGraph):
"""
if edge_weight_name and (edge_weight_units is None):
raise ValueError(
"Please specify units associated with your edge weights. Can be "
"empty string if arbitrary or dimensionless."
"Please specify units associated "
"with your edge weights. Can be "
"empty string if arbitrary or "
"dimensionless."
)

# construct graph with one node per site
Expand Down Expand Up @@ -237,7 +245,7 @@ def with_edges(structure, edges):
return sg

@staticmethod
def with_local_env_strategy(structure, strategy, weights=False, edge_properties=False) -> StructureGraph:
def with_local_env_strategy(structure, strategy, weights=False, edge_properties=False):
"""
Constructor for StructureGraph, using a strategy
from :class:`pymatgen.analysis.local_env`.
Expand All @@ -255,15 +263,15 @@ def with_local_env_strategy(structure, strategy, weights=False, edge_properties=

sg = StructureGraph.with_empty_graph(structure, name="bonds")

for idx, neighbors in enumerate(strategy.get_all_nn_info(structure)):
for n, neighbors in enumerate(strategy.get_all_nn_info(structure)):
for neighbor in neighbors:
# local_env will always try to add two edges
# for any one bond, one from site u to site v
# and another form site v to site u: this is
# harmless, so warn_duplicates=False
if edge_properties:
sg.add_edge(
from_index=idx,
from_index=n,
from_jimage=(0, 0, 0),
to_index=neighbor["site_index"],
to_jimage=neighbor["image"],
Expand All @@ -273,7 +281,7 @@ def with_local_env_strategy(structure, strategy, weights=False, edge_properties=
)
else:
sg.add_edge(
from_index=idx,
from_index=n,
from_jimage=(0, 0, 0),
to_index=neighbor["site_index"],
to_jimage=neighbor["image"],
Expand All @@ -285,8 +293,8 @@ def with_local_env_strategy(structure, strategy, weights=False, edge_properties=
return sg

@property
def name(self) -> str:
"""Name of graph"""
def name(self):
""":return: Name of graph"""
return self.graph.graph["name"]

@property
Expand Down Expand Up @@ -676,8 +684,8 @@ def map_indices(grp):
atoms = len(grp) - 1
offset = len(self.structure) - atoms

for idx in range(atoms):
grp_map[idx] = idx + offset
for i in range(atoms):
grp_map[i] = i + offset

return grp_map

Expand Down Expand Up @@ -1619,7 +1627,7 @@ def with_empty_graph(cls, molecule, name="bonds", edge_weight_name=None, edge_we
return cls(molecule, graph_data=graph_data)

@staticmethod
def with_edges(molecule, edges):
def with_edges(molecule: Molecule, edges: dict[tuple[int, int], dict]):
"""
Constructor for MoleculeGraph, using pre-existing or pre-defined edges
with optional edge parameters.
Expand All @@ -1643,7 +1651,7 @@ def with_edges(molecule, edges):
if props is not None:
weight = props.pop("weight", None)
if len(props.items()) == 0:
props = None
props = None # type: ignore
else:
weight = None

Expand Down Expand Up @@ -2090,11 +2098,11 @@ def build_unique_fragments(self):
comp = "".join(sorted(comp))
subgraph = nx.subgraph(graph, combination)
if nx.is_connected(subgraph):
key = f"{comp} {len(subgraph.edges())}"
if key not in frag_dict:
frag_dict[key] = [copy.deepcopy(subgraph)]
mykey = comp + str(len(subgraph.edges()))
if mykey not in frag_dict:
frag_dict[mykey] = [copy.deepcopy(subgraph)]
else:
frag_dict[key].append(copy.deepcopy(subgraph))
frag_dict[mykey].append(copy.deepcopy(subgraph))

# narrow to all unique fragments using graph isomorphism
unique_frag_dict = {}
Expand Down
33 changes: 18 additions & 15 deletions tests/analysis/test_graphs.py
Expand Up @@ -837,27 +837,30 @@ def test_find_rings(self):
def test_isomorphic(self):
ethyl_xyz_path = os.path.join(PymatgenTest.TEST_FILES_DIR, "graphs/ethylene.xyz")
ethylene = Molecule.from_file(ethyl_xyz_path)
# switch carbons
# swap carbons
ethylene[0], ethylene[1] = ethylene[1], ethylene[0]

eth_copy = MoleculeGraph.with_edges(
ethylene,
{
(0, 1): {"weight": 2},
(1, 2): {"weight": 1},
(1, 3): {"weight": 1},
(0, 4): {"weight": 1},
(0, 5): {"weight": 1},
},
)
edges = {
(0, 1): {"weight": 2},
(1, 2): {"weight": 1},
(1, 3): {"weight": 1},
(0, 4): {"weight": 1},
(0, 5): {"weight": 1},
}

ethylene_graph = MoleculeGraph.with_edges(ethylene, edges)
# If they are equal, they must also be isomorphic
eth_copy = copy.deepcopy(self.ethylene)
assert self.ethylene.isomorphic_to(eth_copy)
assert self.ethylene.isomorphic_to(ethylene_graph)
assert not self.butadiene.isomorphic_to(self.ethylene)

# check fix in https://github.com/materialsproject/pymatgen/pull/3221
# by comparing graph with equal nodes but different edges
edges[(1, 4)] = {"weight": 2}
assert not self.ethylene.isomorphic_to(MoleculeGraph.with_edges(ethylene, edges))

def test_substitute(self):
molecule = FunctionalGroups["methyl"]
molgraph = MoleculeGraph.with_edges(
mol_graph = MoleculeGraph.with_edges(
molecule,
{(0, 1): {"weight": 1}, (0, 2): {"weight": 1}, (0, 3): {"weight": 1}},
)
Expand All @@ -879,7 +882,7 @@ def test_substitute(self):

# Check that MoleculeGraph input is handled properly
eth_graph.substitute_group(5, molecule, MinimumDistanceNN, graph_dict=graph_dict)
eth_mg.substitute_group(5, molgraph, MinimumDistanceNN)
eth_mg.substitute_group(5, mol_graph, MinimumDistanceNN)
assert eth_graph.graph.get_edge_data(5, 6)[0]["weight"] == 1.0
assert eth_mg == eth_graph

Expand Down