From 0549d11c85600ecfcd916332c4d3c6332b9ad675 Mon Sep 17 00:00:00 2001
From: jmmshn <jmmshn@gmail.com>
Date: Tue, 23 Mar 2021 14:55:23 -0700
Subject: [PATCH] changed group id name

---
 .../emmet/builders/materials/electrodes.py    | 27 +++++++--------
 emmet-core/emmet/core/electrode.py            |  2 +-
 emmet-core/emmet/core/structure_group.py      | 34 ++++++++-----------
 tests/emmet-core/test_structure_group.py      |  9 ++---
 4 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/emmet-builders/emmet/builders/materials/electrodes.py b/emmet-builders/emmet/builders/materials/electrodes.py
index 944efc12da..0fa9357436 100644
--- a/emmet-builders/emmet/builders/materials/electrodes.py
+++ b/emmet-builders/emmet/builders/materials/electrodes.py
@@ -189,9 +189,9 @@ def get_items(self):
                     self.sgroups.query(
                         criteria={"chemsys": chemsys},
                         properties=[
-                            "material_id",
+                            "group_id",
                             self.sgroups.last_updated_field,
-                            "grouped_ids",
+                            "material_ids",
                         ],
                     )
                 )
@@ -222,24 +222,24 @@ def get_items(self):
                 )
 
                 # If any material id is missing or if any material id has been updated
-                target_mat_ids = set()
+                target_ids = set()
                 for g_doc in all_target_docs:
-                    target_mat_ids |= set(g_doc["grouped_ids"])
+                    target_ids |= set(g_doc["material_ids"])
 
                 self.logger.debug(
-                    f"There are {len(mat_ids)} material ids in the source database vs {len(target_mat_ids)} in the target database."
+                    f"There are {len(mat_ids)} material ids in the source database vs {len(target_ids)} in the target database."
                 )
-                if mat_ids == target_mat_ids and max_mat_time < min_target_time:
+                if mat_ids == target_ids and max_mat_time < min_target_time:
                     yield None
-                elif len(target_mat_ids) == 0:
+                elif len(target_ids) == 0:
                     self.logger.info(
                         f"No documents in chemsys {chemsys} in the target database."
                     )
                 else:
                     self.logger.info(
-                        f"Nuking all {len(target_mat_ids)} documents in chemsys {chemsys} in the target database."
+                        f"Nuking all {len(target_ids)} documents in chemsys {chemsys} in the target database."
                     )
-                    self._remove_targets(list(target_mat_ids))
+                    self._remove_targets(list(target_ids))
             else:
                 yield {"chemsys": chemsys, "materials": all_mats_in_chemsys}
 
@@ -275,13 +275,10 @@ def process_item(self, item: Any) -> Any:
             stol=self.stol,
             angle_tol=self.angle_tol,
         )
-        # append the working_ion to the group ids
-        for sg in s_groups:
-            sg.material_id = f"{sg.material_id}_{self.working_ion}"
         return [sg.dict() for sg in s_groups]
 
     def _remove_targets(self, rm_ids):
-        self.sgroups.remove_docs({"material_id": {"$in": rm_ids}})
+        self.sgroups.remove_docs({"material_ids": {"$in": rm_ids}})
 
 
 class InsertionElectrodeBuilder(MapBuilder):
@@ -317,14 +314,14 @@ def get_working_ion_entry(working_ion):
 
         def modify_item(item):
             self.logger.debug(
-                f"Looking for {len(item['grouped_ids'])} material_id in the Thermo DB."
+                f"Looking for {len(item['material_ids'])} material_id in the Thermo DB."
             )
             with self.thermo as store:
                 thermo_docs = [
                     *store.query(
                         {
                             "$and": [
-                                {"material_id": {"$in": item["grouped_ids"]}},
+                                {"material_id": {"$in": item["material_ids"]}},
                             ]
                         },
                         properties=[
diff --git a/emmet-core/emmet/core/electrode.py b/emmet-core/emmet/core/electrode.py
index e9aeb9b6a0..0882050a47 100644
--- a/emmet-core/emmet/core/electrode.py
+++ b/emmet-core/emmet/core/electrode.py
@@ -21,7 +21,7 @@ class VoltagePairDoc(BaseModel):
         object to gain access to some basic statistics about the voltage step
     """
 
-    max_delta_volume: str = Field(
+    max_delta_volume: float = Field(
         None,
         description="Volume changes in % for a particular voltage step using: "
         "max(charge, discharge) / min(charge, discharge) - 1",
diff --git a/emmet-core/emmet/core/structure_group.py b/emmet-core/emmet/core/structure_group.py
index f7899b39ad..b5478906cd 100644
--- a/emmet-core/emmet/core/structure_group.py
+++ b/emmet-core/emmet/core/structure_group.py
@@ -50,23 +50,17 @@ class StructureGroupDoc(BaseModel):
     Group of structure
     """
 
-    material_id: Union[MPID, int] = Field(
+    group_id: str = Field(
         None,
-        description="The combined material_id of the grouped document is given by the numerically smallest task id ",
-    )
-
-    structure_matched: bool = Field(
-        None,
-        description="True if the structure matching was performed to group theses entries together."
-        "This is False for groups that contain all the left over entries like the ones that only "
-        "contain the ignored species.",
+        description="The combined material_id of the grouped document is given by the numerically smallest "
+        "material_id, you can also append the followed by the ignored species at the end.",
     )
 
     has_distinct_compositions: bool = Field(
         None, description="True if multiple compositions are present in the group."
     )
 
-    grouped_ids: list = Field(
+    material_ids: list = Field(
         None,
         description="A list of materials ids for all of the materials that were grouped together.",
     )
@@ -99,12 +93,12 @@ def from_grouped_entries(
         cls,
         entries: List[Union[ComputedEntry, ComputedStructureEntry]],
         ignored_species: List[str],
-        structure_matched: bool,
     ) -> "StructureGroupDoc":
         """ "
         Assuming a list of entries are already grouped together, create a StructureGroupDoc
         Args:
             entries: A list of entries that is already grouped together.
+            ignored_species: The species that are ignored during structure matching
         """
         all_atoms = set()
         all_comps = set()
@@ -121,11 +115,10 @@ def from_grouped_entries(
             framework_str = framework_comp.reduced_formula
         ids = [ient.entry_id for ient in entries]
         lowest_id = min(ids, key=_get_id_num)
-
+        sub_script = "_".join(ignored_species)
         fields = {
-            "material_id": lowest_id,
-            "grouped_ids": ids,
-            "structure_matched": structure_matched,
+            "group_id": f"{lowest_id}_{sub_script}",
+            "material_ids": ids,
             "framework_formula": framework_str,
             "ignored_species": sorted(ignored_species),
             "chemsys": "-".join(sorted(all_atoms | set(ignored_species))),
@@ -176,13 +169,13 @@ def from_ungrouped_structure_entries(
 
         cnt_ = 0
         for framework, f_group in framework_groups:
-            # if you only have ignored atoms put them into one "ignored" groupd
+            # if you only have ignored atoms put them into one "ignored" group
             f_group_l = list(f_group)
             if framework == "ignored":
                 struct_group = cls.from_grouped_entries(
-                    f_group_l, ignored_species=ignored_species, structure_matched=False
+                    f_group_l, ignored_species=ignored_species
                 )
-                cnt_ += len(struct_group.grouped_ids)
+                cnt_ += len(struct_group.material_ids)
                 continue
 
             logger.debug(
@@ -190,9 +183,9 @@ def from_ungrouped_structure_entries(
             )
             for g in group_entries_with_structure_matcher(f_group_l, sm):
                 struct_group = cls.from_grouped_entries(
-                    g, ignored_species=ignored_species, structure_matched=True
+                    g, ignored_species=ignored_species
                 )
-                cnt_ += len(struct_group.grouped_ids)
+                cnt_ += len(struct_group.material_ids)
                 results.append(struct_group)
         if cnt_ != len(entries):
             raise RuntimeError(
@@ -209,6 +202,7 @@ def group_entries_with_structure_matcher(
     Group the entries together based on similarity of the  primitive cells
     Args:
         g: a list of entries
+        struct_matcher: the StructureMatcher object used to aggregate structures
     Returns:
         subgroups: subgroups that are grouped together based on structure similarity
     """
diff --git a/tests/emmet-core/test_structure_group.py b/tests/emmet-core/test_structure_group.py
index 29231b0a2c..ad26865dd7 100644
--- a/tests/emmet-core/test_structure_group.py
+++ b/tests/emmet-core/test_structure_group.py
@@ -36,10 +36,11 @@ def entries_lfeo(test_dir):
 
 def test_StructureGroupDoc_from_grouped_entries(entries_lto):
     sgroup_doc = StructureGroupDoc.from_grouped_entries(
-        entries_lto, ignored_species=["Li"], structure_matched=True
+        entries_lto,
+        ignored_species=["Li"],
     )
-    assert sgroup_doc.material_id == "mp-0"
-    assert sgroup_doc.grouped_ids == ["mp-0", "mp-1", "mp-2", "mp-3", "mp-4", "mp-5"]
+    assert sgroup_doc.group_id == "mp-0_Li"
+    assert sgroup_doc.material_ids == ["mp-0", "mp-1", "mp-2", "mp-3", "mp-4", "mp-5"]
     assert sgroup_doc.framework_formula == "TiO2"
     assert sgroup_doc.ignored_species == ["Li"]
     assert sgroup_doc.chemsys == "Li-O-Ti"
@@ -56,7 +57,7 @@ def test_StructureGroupDoc_from_ungrouped_entries(entries_lfeo):
     for sgroup_doc in sgroup_docs:
         framework_ref = sgroup_doc.framework_formula
         ignored = sgroup_doc.ignored_species
-        for entry_id in sgroup_doc.grouped_ids:
+        for entry_id in sgroup_doc.material_ids:
             dd_ = entry_dict[entry_id].composition.as_dict()
             for k in ignored:
                 if k in dd_: