From bd71564f8ef0a9ed7bf0fd5d22f1494f0258022b Mon Sep 17 00:00:00 2001 From: jmmshn Date: Wed, 17 Feb 2021 12:28:37 -0500 Subject: [PATCH 1/3] made structure grouper work with material_ids --- .../emmet/builders/materials/electrodes.py | 45 ++++++++++--------- emmet-core/emmet/core/electrode.py | 4 +- emmet-core/emmet/core/structure_group.py | 8 ++-- emmet-core/emmet/core/vasp/material.py | 2 +- tests/emmet-core/test_structure_group.py | 2 +- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/emmet-builders/emmet/builders/materials/electrodes.py b/emmet-builders/emmet/builders/materials/electrodes.py index d4b9e22033..ac9c2f398b 100644 --- a/emmet-builders/emmet/builders/materials/electrodes.py +++ b/emmet-builders/emmet/builders/materials/electrodes.py @@ -30,7 +30,7 @@ def s_hash(el): return el.data["comp_delith"] -# MatDoc = namedtuple("MatDoc", ["task_id", "structure", "formula_pretty", "framework"]) +# MatDoc = namedtuple("MatDoc", ["material_id", "structure", "formula_pretty", "framework"]) REDOX_ELEMENTS = [ "Ti", @@ -56,7 +56,7 @@ def s_hash(el): MAT_PROPS = [ "structure", - "task_id", + "material_id", "formula_pretty", ] @@ -171,12 +171,11 @@ def get_items(self): chemsys_query = { "$and": [ {"chemsys": {"$in": [chemsys_wo, chemsys]}}, - {"_sbxn": {"$in": ["core"]}}, self.query.copy(), ] } self.logger.debug(f"QUERY: {chemsys_query}") - + print(chemsys_query) all_mats_in_chemsys = list( self.materials.query( criteria=chemsys_query, @@ -191,7 +190,7 @@ def get_items(self): self.sgroups.query( criteria={"chemsys": chemsys}, properties=[ - "task_id", + "material_id", self.sgroups.last_updated_field, "grouped_ids", ], @@ -219,7 +218,9 @@ def get_items(self): f"The newest GROUP doc was generated at {min_target_time}." ) - mat_ids = set([mat_doc["task_id"] for mat_doc in all_mats_in_chemsys]) + mat_ids = set( + [mat_doc["material_id"] for mat_doc in all_mats_in_chemsys] + ) # If any material id is missing or if any material id has been updated target_mat_ids = set() @@ -245,7 +246,7 @@ def update_targets(self, items: List): self.logger.info("Updating {} sgroups documents".format(len(items))) for struct_group_dict in items: struct_group_dict[self.sgroups.last_updated_field] = datetime.utcnow() - self.sgroups.update(docs=items, key=["task_id"]) + self.sgroups.update(docs=items, key=["material_id"]) else: self.logger.info("No items to update") @@ -253,7 +254,7 @@ def _entry_from_mat_doc(self, mdoc): # Note since we are just structure grouping we don't need to be careful with energy or correction # All of the energy analysis is left to other builders d_ = { - "entry_id": mdoc["task_id"], + "entry_id": mdoc["material_id"], "structure": mdoc["structure"], "energy": -math.inf, "correction": -math.inf, @@ -271,11 +272,11 @@ def process_item(self, item: Any) -> Any: ) # append the working_ion to the group ids for sg in s_groups: - sg.task_id = f"{sg.task_id}_{self.working_ion}" + sg.material_id = f"{sg.material_id}_{self.working_ion}" return [sg.dict() for sg in s_groups] def _remove_targets(self, rm_ids): - self.sgroups.remove_docs({"task_id": {"$in": rm_ids}}) + self.sgroups.remove_docs({"material_id": {"$in": rm_ids}}) class InsertionElectrodeBuilder(MapBuilder): @@ -315,18 +316,17 @@ def get_working_ion_entry(working_ion): def modify_item(item): self.logger.debug( - f"Looking for {len(item['grouped_ids'])} task_ids in the Thermo DB." + f"Looking for {len(item['grouped_ids'])} material_id in the Thermo DB." ) with self.thermo as store: thermo_docs = [ *store.query( { "$and": [ - {"task_id": {"$in": item["grouped_ids"]}}, - {"_sbxn": {"$in": ["core"]}}, + {"material_id": {"$in": item["grouped_ids"]}}, ] }, - properties=["task_id", "_sbxn", "thermo"], + properties=["material_id", "_sbxn", "thermo"], ) ] @@ -335,11 +335,11 @@ def modify_item(item): *store.query( { "$and": [ - {"task_id": {"$in": item["grouped_ids"]}}, + {"material_id": {"$in": item["grouped_ids"]}}, {"_sbxn": {"$in": ["core"]}}, ] }, - properties=["task_id", "structure"], + properties=["material_id", "structure"], ) ] @@ -350,7 +350,7 @@ def modify_item(item): ) working_ion_doc = get_working_ion_entry(item["ignored_species"][0]) return { - "task_id": item["task_id"], + "material_id": item["material_id"], "working_ion_doc": working_ion_doc, "working_ion": item["ignored_species"][0], "thermo_docs": thermo_docs, @@ -371,10 +371,11 @@ def unary_function(self, item): ) working_ion = working_ion_entry.composition.reduced_formula decomp_energies = { - d_["task_id"]: d_["thermo"]["e_above_hull"] for d_ in item["thermo_docs"] + d_["material_id"]: d_["thermo"]["e_above_hull"] + for d_ in item["thermo_docs"] } mat_structures = { - mat_d_["task_id"]: Structure.from_dict(mat_d_["structure"]) + mat_d_["material_id"]: Structure.from_dict(mat_d_["structure"]) for mat_d_ in item["material_docs"] } @@ -383,7 +384,7 @@ def unary_function(self, item): ) mdoc_ = next( filter( - lambda x: x["task_id"] == least_wion_ent.entry_id, + lambda x: x["material_id"] == least_wion_ent.entry_id, item["material_docs"], ) ) @@ -393,7 +394,7 @@ def unary_function(self, item): for ient in entries: if mat_structures[ient.entry_id].composition != ient.composition: raise RuntimeError( - f"In {item['task_id']}: the compositions for task {ient.entry_id} are matched " + f"In {item['material_id']}: the compositions for task {ient.entry_id} are matched " "between the StructureGroup DB and the Thermo DB " ) ient.data["volume"] = mat_structures[ient.entry_id].volume @@ -402,7 +403,7 @@ def unary_function(self, item): ie = InsertionElectrodeDoc.from_entries( grouped_entries=entries, working_ion_entry=working_ion_entry, - task_id=item["task_id"], + task_id=item["material_id"], host_structure=host_structure, ) if ie is None: diff --git a/emmet-core/emmet/core/electrode.py b/emmet-core/emmet/core/electrode.py index 50dbe76b44..eafdf15b09 100644 --- a/emmet-core/emmet/core/electrode.py +++ b/emmet-core/emmet/core/electrode.py @@ -78,7 +78,7 @@ class InsertionElectrodeDoc(InsertionVoltagePairDoc): Insertion electrode """ - task_id: str = Field(None, description="The id for this battery document.") + battery_id: str = Field(None, description="The id for this battery document.") framework_formula: str = Field( None, description="The id for this battery document." @@ -158,7 +158,7 @@ class ConversionVoltagePairDoc(VoltagePairDoc): class ConversionElectrodeDoc(ConversionVoltagePairDoc): - task_id: str = Field(None, description="The id for this battery document.") + battery_id: str = Field(None, description="The id for this battery document.") adj_pairs: List[ConversionVoltagePairDoc] = Field( None, diff --git a/emmet-core/emmet/core/structure_group.py b/emmet-core/emmet/core/structure_group.py index a4c10812ff..c0ef80191f 100644 --- a/emmet-core/emmet/core/structure_group.py +++ b/emmet-core/emmet/core/structure_group.py @@ -49,9 +49,9 @@ class StructureGroupDoc(BaseModel): Group of structure """ - task_id: str = Field( + material_id: str = Field( None, - description="The combined task_id of the grouped document is given by the numerically smallest task id ", + description="The combined material_id of the grouped document is given by the numerically smallest task id ", ) structure_matched: bool = Field( @@ -122,7 +122,7 @@ def from_grouped_entries( lowest_id = min(ids, key=_get_id_num) fields = { - "task_id": lowest_id, + "material_id": lowest_id, "grouped_ids": ids, "structure_matched": structure_matched, "framework_formula": framework_str, @@ -223,7 +223,7 @@ def group_entries_with_structure_matcher( def _get_id_num(task_id) -> Union[int, str]: if isinstance(task_id, int): return task_id - if isinstance(task_id, str) and "-" in task_id: + if isinstance(task_id, str): return int(task_id.split("-")[-1]) else: raise ValueError("TaskID needs to be either a number or of the form xxx-#####") diff --git a/emmet-core/emmet/core/vasp/material.py b/emmet-core/emmet/core/vasp/material.py index 87feb02ac5..ac91441880 100644 --- a/emmet-core/emmet/core/vasp/material.py +++ b/emmet-core/emmet/core/vasp/material.py @@ -137,7 +137,7 @@ def _structure_eval(task: TaskDocument): if len(relevant_calcs) > 0: best_task_doc = relevant_calcs[0] entry = best_task_doc.structure_entry - entry.data["task_id"] = entry.entry_id + entry.data["material_id"] = entry.entry_id entry.entry_id = material_id entries[rt] = entry diff --git a/tests/emmet-core/test_structure_group.py b/tests/emmet-core/test_structure_group.py index ad554eb1a1..9a1bfa2a2d 100644 --- a/tests/emmet-core/test_structure_group.py +++ b/tests/emmet-core/test_structure_group.py @@ -38,7 +38,7 @@ def test_StructureGroupDoc_from_grouped_entries(entries_lto): sgroup_doc = StructureGroupDoc.from_grouped_entries( entries_lto, ignored_species=["Li"], structure_matched=True ) - assert sgroup_doc.task_id == "mp-0" + assert sgroup_doc.material_id == "mp-0" assert sgroup_doc.grouped_ids == ["mp-0", "mp-1", "mp-2", "mp-3", "mp-4", "mp-5"] assert sgroup_doc.framework_formula == "TiO2" assert sgroup_doc.ignored_species == ["Li"] From c99993c670ffec23aa4f7d28e97f994ece9ec993 Mon Sep 17 00:00:00 2001 From: jmmshn Date: Wed, 17 Feb 2021 13:08:54 -0500 Subject: [PATCH 2/3] typo --- emmet-core/emmet/core/vasp/material.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emmet-core/emmet/core/vasp/material.py b/emmet-core/emmet/core/vasp/material.py index ac91441880..87feb02ac5 100644 --- a/emmet-core/emmet/core/vasp/material.py +++ b/emmet-core/emmet/core/vasp/material.py @@ -137,7 +137,7 @@ def _structure_eval(task: TaskDocument): if len(relevant_calcs) > 0: best_task_doc = relevant_calcs[0] entry = best_task_doc.structure_entry - entry.data["material_id"] = entry.entry_id + entry.data["task_id"] = entry.entry_id entry.entry_id = material_id entries[rt] = entry From d588bd9635fc98b41cc0f301bf37c57ec8c76873 Mon Sep 17 00:00:00 2001 From: Shyam Dwaraknath Date: Sat, 20 Feb 2021 08:15:35 -0800 Subject: [PATCH 3/3] Update emmet-builders/emmet/builders/materials/electrodes.py --- emmet-builders/emmet/builders/materials/electrodes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/emmet-builders/emmet/builders/materials/electrodes.py b/emmet-builders/emmet/builders/materials/electrodes.py index ac9c2f398b..0fe24de01c 100644 --- a/emmet-builders/emmet/builders/materials/electrodes.py +++ b/emmet-builders/emmet/builders/materials/electrodes.py @@ -175,7 +175,6 @@ def get_items(self): ] } self.logger.debug(f"QUERY: {chemsys_query}") - print(chemsys_query) all_mats_in_chemsys = list( self.materials.query( criteria=chemsys_query,