Merge pull request #975 from openforcefield/gmx-unique-molecule-names

Ensure molecule names are unique at GROMACS export time
openforcefield · Apr 26, 2024 · c50450d · c50450d
2 parents f7efaed + 01c5803
commit c50450d
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 18 deletions.
diff --git a/devtools/conda-envs/examples_env.yaml b/devtools/conda-envs/examples_env.yaml
@@ -9,7 +9,7 @@ dependencies:
   - pydantic =2
   - openmm
   # OpenFF stack
-  - openff-toolkit =0.15.2
+  - openff-toolkit
   - openff-models
   - openff-nagl
   - openff-nagl-models

diff --git a/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py b/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py
@@ -401,6 +401,19 @@ def test_nonconsecutive_isomorphic_molecules(self, sage_unconstrained):
             {"Nonbonded": 0.5 * unit.kilojoule_per_mole},
         )
 
+    @pytest.mark.parametrize("name", ["MOL0", "MOL999", ""])
+    def test_exisiting_mol0_names_overwritten(self, name, sage, ethanol, cyclohexane):
+        pytest.importorskip("parmed")
+
+        ethanol.name = name
+        cyclohexane.name = name
+
+        sage.create_interchange(
+            Topology.from_molecules([ethanol, cyclohexane]),
+        ).to_top("tmp.top")
+
+        assert [*parmed.load_file("tmp.top").molecules.keys()] == ["MOL0", "MOL1"]
+
 
 class TestGROMACSMetadata:
     @skip_if_missing("openmm")

diff --git a/openff/interchange/_tests/unit_tests/interop/gromacs/models/test_models.py b/openff/interchange/_tests/unit_tests/interop/gromacs/models/test_models.py
@@ -19,7 +19,7 @@ def molecule1():
         "[H][O][c]1[c]([H])[c]([O][H])[c]([H])[c]([O][H])[c]1[H]",
     )
     molecule.generate_conformers(n_conformers=1)
-    molecule.name = "MOL1"
+    molecule.name = "MOL__1"
 
     return molecule
 
@@ -28,7 +28,7 @@ def molecule1():
 def molecule2():
     molecule = Molecule.from_smiles("C1=C(C=C(C=C1C(=O)O)C(=O)O)C(=O)O")
     molecule.generate_conformers(n_conformers=1)
-    molecule.name = "MOL2"
+    molecule.name = "MOL__2"
 
     molecule.conformers[0] += numpy.array([5, 0, 0]) * unit.angstrom
 
@@ -96,7 +96,7 @@ def test_massive_virtual_site_error(self):
 @pytest.mark.slow
 class TestAddRemoveMoleculeType:
     @needs_gmx
-    @pytest.mark.parametrize("molecule_name", ["MOL1", "MOL2"])
+    @pytest.mark.parametrize("molecule_name", ["MOL__1", "MOL__2"])
     def test_remove_basic(self, combined_system, molecule_name):
         combined_system.remove_molecule_type(molecule_name)
 
@@ -110,7 +110,7 @@ def test_remove_basic(self, combined_system, molecule_name):
         )
 
     @pytest.mark.slow
-    @pytest.mark.parametrize("molecule_name", ["MOL1", "MOL2"])
+    @pytest.mark.parametrize("molecule_name", ["MOL__1", "MOL__2"])
     def test_add_existing_molecule_type(self, combined_system, molecule_name):
         with pytest.raises(
             ValueError,
@@ -140,8 +140,8 @@ def test_different_force_field_different_energies(
         )
         molecule2_sage = _convert(Interchange.from_smirnoff(sage, [molecule2], box=box))
 
-        parsley_system.add_molecule_type(molecule2_parsley.molecule_types["MOL2"], 1)
-        sage_system.add_molecule_type(molecule2_sage.molecule_types["MOL2"], 1)
+        parsley_system.add_molecule_type(molecule2_parsley.molecule_types["MOL__2"], 1)
+        sage_system.add_molecule_type(molecule2_sage.molecule_types["MOL__2"], 1)
 
         parsley_system.positions = combined_system.positions
         sage_system.positions = combined_system.positions
@@ -163,10 +163,10 @@ def test_molecule_order_independent(self, system1, system2):
         positions1 = numpy.vstack([system1.positions, system2.positions])
         positions2 = numpy.vstack([system2.positions, system1.positions])
 
-        system1.add_molecule_type(system2.molecule_types["MOL2"], 1)
+        system1.add_molecule_type(system2.molecule_types["MOL__2"], 1)
         system1.positions = positions1
 
-        system2.add_molecule_type(system1.molecule_types["MOL1"], 1)
+        system2.add_molecule_type(system1.molecule_types["MOL__1"], 1)
         system2.positions = positions2
 
         system1.to_files(prefix="order1", decimal=8)
@@ -182,27 +182,27 @@ def test_molecule_order_independent(self, system1, system2):
     def test_clashing_atom_types(self, combined_system, system1, system2):
         with pytest.raises(
             ValueError,
-            match="The molecule type MOL1 is already present in this system.",
+            match="The molecule type MOL__1 is already present in this system.",
         ):
-            combined_system.add_molecule_type(system1.molecule_types["MOL1"], 1)
+            combined_system.add_molecule_type(system1.molecule_types["MOL__1"], 1)
 
         with pytest.raises(
             ValueError,
-            match="The molecule type MOL2 is already present in this system.",
+            match="The molecule type MOL__2 is already present in this system.",
         ):
-            combined_system.add_molecule_type(system2.molecule_types["MOL2"], 1)
+            combined_system.add_molecule_type(system2.molecule_types["MOL__2"], 1)
 
         with pytest.raises(
             ValueError,
-            match="The molecule type MOL1 is already present in this system.",
+            match="The molecule type MOL__1 is already present in this system.",
         ):
-            system1.add_molecule_type(system1.molecule_types["MOL1"], 1)
+            system1.add_molecule_type(system1.molecule_types["MOL__1"], 1)
 
         with pytest.raises(
             ValueError,
-            match="The molecule type MOL2 is already present in this system.",
+            match="The molecule type MOL__2 is already present in this system.",
         ):
-            system2.add_molecule_type(system2.molecule_types["MOL2"], 1)
+            system2.add_molecule_type(system2.molecule_types["MOL__2"], 1)
 
 
 class TestToFiles:

diff --git a/openff/interchange/components/interchange.py b/openff/interchange/components/interchange.py
@@ -441,6 +441,11 @@ def to_gromacs(
             The flag to define behaviour of GROMACSWriter. If True, then similar atom types will be merged.
             If False, each atom will have its own atom type.
 
+        Notes
+        -----
+        Molecule names in written files are not guaranteed to match the `Moleclue.name` attribute of the
+        molecules in the topology, especially if they are empty strings or not unique.
+
         """
         from openff.interchange.interop.gromacs.export._export import GROMACSWriter
         from openff.interchange.smirnoff._gromacs import _convert
@@ -475,6 +480,11 @@ def to_top(
             The flag to define behaviour of GROMACSWriter. If True, then similar atom types will be merged.
             If False, each atom will have its own atom type.
 
+        Notes
+        -----
+        Molecule names in written files are not guaranteed to match the `Moleclue.name` attribute of the
+        molecules in the topology, especially if they are empty strings or not unique.
+
         """
         from openff.interchange.interop.gromacs.export._export import GROMACSWriter
         from openff.interchange.smirnoff._gromacs import _convert

diff --git a/openff/interchange/operations/_combine.py b/openff/interchange/operations/_combine.py
@@ -106,6 +106,11 @@ def _combine(
                     {pot_key: handler.potentials[pot_key]},
                 )
 
+        # Ensure the charge cache is rebuilt
+        if handler_name == "Electrostatics":
+            self_handler._charges_cached = False
+            self_handler._get_charges()
+
         result.collections[handler_name] = self_handler
 
     if result.positions is not None and input2.positions is not None:

diff --git a/openff/interchange/smirnoff/_gromacs.py b/openff/interchange/smirnoff/_gromacs.py
@@ -1,4 +1,5 @@
 import itertools
+import re
 from collections import defaultdict
 from typing import Optional, TypeAlias, Union
 
@@ -116,9 +117,22 @@ def _convert(
     for unique_molecule_index in unique_molecule_map:
         unique_molecule = interchange.topology.molecule(unique_molecule_index)
 
-        if getattr(unique_molecule, "name", "") == "":
+        # If this molecule doesn't have a name ("^$", empty string), name it MOL0 incrementing
+        # Also rename it if it's already MOL\d+ since that was probably assigned by this function
+        # earlier in a pipeline. The molecule_types dict keys by molecule names and it's important
+        # that they are unique (in the same way that the unitand moleucle names are)
+        if re.match(
+            r"^$|MOL\d+",
+            getattr(unique_molecule, "name", ""),  # SimpleMolecule might not have .name
+        ):
             unique_molecule.name = "MOL" + str(unique_molecule_index)
 
+        if unique_molecule.name in system.molecule_types:
+            raise RuntimeError(
+                "Problem keeping molecule names unique. This should not be possible - please"
+                "raise an issue describing how this error occurred.",
+            )
+
         for atom in unique_molecule.atoms:
             atom_type_name = f"{unique_molecule.name}_{particle_map[unique_molecule.atom_index(atom)]}"
             _atom_atom_type_map[atom] = atom_type_name