From 0549d11c85600ecfcd916332c4d3c6332b9ad675 Mon Sep 17 00:00:00 2001 From: jmmshn Date: Tue, 23 Mar 2021 14:55:23 -0700 Subject: [PATCH] changed group id name --- .../emmet/builders/materials/electrodes.py | 27 +++++++-------- emmet-core/emmet/core/electrode.py | 2 +- emmet-core/emmet/core/structure_group.py | 34 ++++++++----------- tests/emmet-core/test_structure_group.py | 9 ++--- 4 files changed, 32 insertions(+), 40 deletions(-) diff --git a/emmet-builders/emmet/builders/materials/electrodes.py b/emmet-builders/emmet/builders/materials/electrodes.py index 944efc12da..0fa9357436 100644 --- a/emmet-builders/emmet/builders/materials/electrodes.py +++ b/emmet-builders/emmet/builders/materials/electrodes.py @@ -189,9 +189,9 @@ def get_items(self): self.sgroups.query( criteria={"chemsys": chemsys}, properties=[ - "material_id", + "group_id", self.sgroups.last_updated_field, - "grouped_ids", + "material_ids", ], ) ) @@ -222,24 +222,24 @@ def get_items(self): ) # If any material id is missing or if any material id has been updated - target_mat_ids = set() + target_ids = set() for g_doc in all_target_docs: - target_mat_ids |= set(g_doc["grouped_ids"]) + target_ids |= set(g_doc["material_ids"]) self.logger.debug( - f"There are {len(mat_ids)} material ids in the source database vs {len(target_mat_ids)} in the target database." + f"There are {len(mat_ids)} material ids in the source database vs {len(target_ids)} in the target database." ) - if mat_ids == target_mat_ids and max_mat_time < min_target_time: + if mat_ids == target_ids and max_mat_time < min_target_time: yield None - elif len(target_mat_ids) == 0: + elif len(target_ids) == 0: self.logger.info( f"No documents in chemsys {chemsys} in the target database." ) else: self.logger.info( - f"Nuking all {len(target_mat_ids)} documents in chemsys {chemsys} in the target database." + f"Nuking all {len(target_ids)} documents in chemsys {chemsys} in the target database." ) - self._remove_targets(list(target_mat_ids)) + self._remove_targets(list(target_ids)) else: yield {"chemsys": chemsys, "materials": all_mats_in_chemsys} @@ -275,13 +275,10 @@ def process_item(self, item: Any) -> Any: stol=self.stol, angle_tol=self.angle_tol, ) - # append the working_ion to the group ids - for sg in s_groups: - sg.material_id = f"{sg.material_id}_{self.working_ion}" return [sg.dict() for sg in s_groups] def _remove_targets(self, rm_ids): - self.sgroups.remove_docs({"material_id": {"$in": rm_ids}}) + self.sgroups.remove_docs({"material_ids": {"$in": rm_ids}}) class InsertionElectrodeBuilder(MapBuilder): @@ -317,14 +314,14 @@ def get_working_ion_entry(working_ion): def modify_item(item): self.logger.debug( - f"Looking for {len(item['grouped_ids'])} material_id in the Thermo DB." + f"Looking for {len(item['material_ids'])} material_id in the Thermo DB." ) with self.thermo as store: thermo_docs = [ *store.query( { "$and": [ - {"material_id": {"$in": item["grouped_ids"]}}, + {"material_id": {"$in": item["material_ids"]}}, ] }, properties=[ diff --git a/emmet-core/emmet/core/electrode.py b/emmet-core/emmet/core/electrode.py index e9aeb9b6a0..0882050a47 100644 --- a/emmet-core/emmet/core/electrode.py +++ b/emmet-core/emmet/core/electrode.py @@ -21,7 +21,7 @@ class VoltagePairDoc(BaseModel): object to gain access to some basic statistics about the voltage step """ - max_delta_volume: str = Field( + max_delta_volume: float = Field( None, description="Volume changes in % for a particular voltage step using: " "max(charge, discharge) / min(charge, discharge) - 1", diff --git a/emmet-core/emmet/core/structure_group.py b/emmet-core/emmet/core/structure_group.py index f7899b39ad..b5478906cd 100644 --- a/emmet-core/emmet/core/structure_group.py +++ b/emmet-core/emmet/core/structure_group.py @@ -50,23 +50,17 @@ class StructureGroupDoc(BaseModel): Group of structure """ - material_id: Union[MPID, int] = Field( + group_id: str = Field( None, - description="The combined material_id of the grouped document is given by the numerically smallest task id ", - ) - - structure_matched: bool = Field( - None, - description="True if the structure matching was performed to group theses entries together." - "This is False for groups that contain all the left over entries like the ones that only " - "contain the ignored species.", + description="The combined material_id of the grouped document is given by the numerically smallest " + "material_id, you can also append the followed by the ignored species at the end.", ) has_distinct_compositions: bool = Field( None, description="True if multiple compositions are present in the group." ) - grouped_ids: list = Field( + material_ids: list = Field( None, description="A list of materials ids for all of the materials that were grouped together.", ) @@ -99,12 +93,12 @@ def from_grouped_entries( cls, entries: List[Union[ComputedEntry, ComputedStructureEntry]], ignored_species: List[str], - structure_matched: bool, ) -> "StructureGroupDoc": """ " Assuming a list of entries are already grouped together, create a StructureGroupDoc Args: entries: A list of entries that is already grouped together. + ignored_species: The species that are ignored during structure matching """ all_atoms = set() all_comps = set() @@ -121,11 +115,10 @@ def from_grouped_entries( framework_str = framework_comp.reduced_formula ids = [ient.entry_id for ient in entries] lowest_id = min(ids, key=_get_id_num) - + sub_script = "_".join(ignored_species) fields = { - "material_id": lowest_id, - "grouped_ids": ids, - "structure_matched": structure_matched, + "group_id": f"{lowest_id}_{sub_script}", + "material_ids": ids, "framework_formula": framework_str, "ignored_species": sorted(ignored_species), "chemsys": "-".join(sorted(all_atoms | set(ignored_species))), @@ -176,13 +169,13 @@ def from_ungrouped_structure_entries( cnt_ = 0 for framework, f_group in framework_groups: - # if you only have ignored atoms put them into one "ignored" groupd + # if you only have ignored atoms put them into one "ignored" group f_group_l = list(f_group) if framework == "ignored": struct_group = cls.from_grouped_entries( - f_group_l, ignored_species=ignored_species, structure_matched=False + f_group_l, ignored_species=ignored_species ) - cnt_ += len(struct_group.grouped_ids) + cnt_ += len(struct_group.material_ids) continue logger.debug( @@ -190,9 +183,9 @@ def from_ungrouped_structure_entries( ) for g in group_entries_with_structure_matcher(f_group_l, sm): struct_group = cls.from_grouped_entries( - g, ignored_species=ignored_species, structure_matched=True + g, ignored_species=ignored_species ) - cnt_ += len(struct_group.grouped_ids) + cnt_ += len(struct_group.material_ids) results.append(struct_group) if cnt_ != len(entries): raise RuntimeError( @@ -209,6 +202,7 @@ def group_entries_with_structure_matcher( Group the entries together based on similarity of the primitive cells Args: g: a list of entries + struct_matcher: the StructureMatcher object used to aggregate structures Returns: subgroups: subgroups that are grouped together based on structure similarity """ diff --git a/tests/emmet-core/test_structure_group.py b/tests/emmet-core/test_structure_group.py index 29231b0a2c..ad26865dd7 100644 --- a/tests/emmet-core/test_structure_group.py +++ b/tests/emmet-core/test_structure_group.py @@ -36,10 +36,11 @@ def entries_lfeo(test_dir): def test_StructureGroupDoc_from_grouped_entries(entries_lto): sgroup_doc = StructureGroupDoc.from_grouped_entries( - entries_lto, ignored_species=["Li"], structure_matched=True + entries_lto, + ignored_species=["Li"], ) - assert sgroup_doc.material_id == "mp-0" - assert sgroup_doc.grouped_ids == ["mp-0", "mp-1", "mp-2", "mp-3", "mp-4", "mp-5"] + assert sgroup_doc.group_id == "mp-0_Li" + assert sgroup_doc.material_ids == ["mp-0", "mp-1", "mp-2", "mp-3", "mp-4", "mp-5"] assert sgroup_doc.framework_formula == "TiO2" assert sgroup_doc.ignored_species == ["Li"] assert sgroup_doc.chemsys == "Li-O-Ti" @@ -56,7 +57,7 @@ def test_StructureGroupDoc_from_ungrouped_entries(entries_lfeo): for sgroup_doc in sgroup_docs: framework_ref = sgroup_doc.framework_formula ignored = sgroup_doc.ignored_species - for entry_id in sgroup_doc.grouped_ids: + for entry_id in sgroup_doc.material_ids: dd_ = entry_dict[entry_id].composition.as_dict() for k in ignored: if k in dd_: