Skip to content

Commit

Permalink
made structure grouper work with material_ids
Browse files Browse the repository at this point in the history
  • Loading branch information
jmmshn committed Feb 17, 2021
1 parent 2e64c09 commit bd71564
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 30 deletions.
45 changes: 23 additions & 22 deletions emmet-builders/emmet/builders/materials/electrodes.py
Expand Up @@ -30,7 +30,7 @@ def s_hash(el):
return el.data["comp_delith"]


# MatDoc = namedtuple("MatDoc", ["task_id", "structure", "formula_pretty", "framework"])
# MatDoc = namedtuple("MatDoc", ["material_id", "structure", "formula_pretty", "framework"])

REDOX_ELEMENTS = [
"Ti",
Expand All @@ -56,7 +56,7 @@ def s_hash(el):

MAT_PROPS = [
"structure",
"task_id",
"material_id",
"formula_pretty",
]

Expand Down Expand Up @@ -171,12 +171,11 @@ def get_items(self):
chemsys_query = {
"$and": [
{"chemsys": {"$in": [chemsys_wo, chemsys]}},
{"_sbxn": {"$in": ["core"]}},
self.query.copy(),
]
}
self.logger.debug(f"QUERY: {chemsys_query}")

print(chemsys_query)
all_mats_in_chemsys = list(
self.materials.query(
criteria=chemsys_query,
Expand All @@ -191,7 +190,7 @@ def get_items(self):
self.sgroups.query(
criteria={"chemsys": chemsys},
properties=[
"task_id",
"material_id",
self.sgroups.last_updated_field,
"grouped_ids",
],
Expand Down Expand Up @@ -219,7 +218,9 @@ def get_items(self):
f"The newest GROUP doc was generated at {min_target_time}."
)

mat_ids = set([mat_doc["task_id"] for mat_doc in all_mats_in_chemsys])
mat_ids = set(
[mat_doc["material_id"] for mat_doc in all_mats_in_chemsys]
)

# If any material id is missing or if any material id has been updated
target_mat_ids = set()
Expand All @@ -245,15 +246,15 @@ def update_targets(self, items: List):
self.logger.info("Updating {} sgroups documents".format(len(items)))
for struct_group_dict in items:
struct_group_dict[self.sgroups.last_updated_field] = datetime.utcnow()
self.sgroups.update(docs=items, key=["task_id"])
self.sgroups.update(docs=items, key=["material_id"])
else:
self.logger.info("No items to update")

def _entry_from_mat_doc(self, mdoc):
# Note since we are just structure grouping we don't need to be careful with energy or correction
# All of the energy analysis is left to other builders
d_ = {
"entry_id": mdoc["task_id"],
"entry_id": mdoc["material_id"],
"structure": mdoc["structure"],
"energy": -math.inf,
"correction": -math.inf,
Expand All @@ -271,11 +272,11 @@ def process_item(self, item: Any) -> Any:
)
# append the working_ion to the group ids
for sg in s_groups:
sg.task_id = f"{sg.task_id}_{self.working_ion}"
sg.material_id = f"{sg.material_id}_{self.working_ion}"
return [sg.dict() for sg in s_groups]

def _remove_targets(self, rm_ids):
self.sgroups.remove_docs({"task_id": {"$in": rm_ids}})
self.sgroups.remove_docs({"material_id": {"$in": rm_ids}})


class InsertionElectrodeBuilder(MapBuilder):
Expand Down Expand Up @@ -315,18 +316,17 @@ def get_working_ion_entry(working_ion):

def modify_item(item):
self.logger.debug(
f"Looking for {len(item['grouped_ids'])} task_ids in the Thermo DB."
f"Looking for {len(item['grouped_ids'])} material_id in the Thermo DB."
)
with self.thermo as store:
thermo_docs = [
*store.query(
{
"$and": [
{"task_id": {"$in": item["grouped_ids"]}},
{"_sbxn": {"$in": ["core"]}},
{"material_id": {"$in": item["grouped_ids"]}},
]
},
properties=["task_id", "_sbxn", "thermo"],
properties=["material_id", "_sbxn", "thermo"],
)
]

Expand All @@ -335,11 +335,11 @@ def modify_item(item):
*store.query(
{
"$and": [
{"task_id": {"$in": item["grouped_ids"]}},
{"material_id": {"$in": item["grouped_ids"]}},
{"_sbxn": {"$in": ["core"]}},
]
},
properties=["task_id", "structure"],
properties=["material_id", "structure"],
)
]

Expand All @@ -350,7 +350,7 @@ def modify_item(item):
)
working_ion_doc = get_working_ion_entry(item["ignored_species"][0])
return {
"task_id": item["task_id"],
"material_id": item["material_id"],
"working_ion_doc": working_ion_doc,
"working_ion": item["ignored_species"][0],
"thermo_docs": thermo_docs,
Expand All @@ -371,10 +371,11 @@ def unary_function(self, item):
)
working_ion = working_ion_entry.composition.reduced_formula
decomp_energies = {
d_["task_id"]: d_["thermo"]["e_above_hull"] for d_ in item["thermo_docs"]
d_["material_id"]: d_["thermo"]["e_above_hull"]
for d_ in item["thermo_docs"]
}
mat_structures = {
mat_d_["task_id"]: Structure.from_dict(mat_d_["structure"])
mat_d_["material_id"]: Structure.from_dict(mat_d_["structure"])
for mat_d_ in item["material_docs"]
}

Expand All @@ -383,7 +384,7 @@ def unary_function(self, item):
)
mdoc_ = next(
filter(
lambda x: x["task_id"] == least_wion_ent.entry_id,
lambda x: x["material_id"] == least_wion_ent.entry_id,
item["material_docs"],
)
)
Expand All @@ -393,7 +394,7 @@ def unary_function(self, item):
for ient in entries:
if mat_structures[ient.entry_id].composition != ient.composition:
raise RuntimeError(
f"In {item['task_id']}: the compositions for task {ient.entry_id} are matched "
f"In {item['material_id']}: the compositions for task {ient.entry_id} are matched "
"between the StructureGroup DB and the Thermo DB "
)
ient.data["volume"] = mat_structures[ient.entry_id].volume
Expand All @@ -402,7 +403,7 @@ def unary_function(self, item):
ie = InsertionElectrodeDoc.from_entries(
grouped_entries=entries,
working_ion_entry=working_ion_entry,
task_id=item["task_id"],
task_id=item["material_id"],
host_structure=host_structure,
)
if ie is None:
Expand Down
4 changes: 2 additions & 2 deletions emmet-core/emmet/core/electrode.py
Expand Up @@ -78,7 +78,7 @@ class InsertionElectrodeDoc(InsertionVoltagePairDoc):
Insertion electrode
"""

task_id: str = Field(None, description="The id for this battery document.")
battery_id: str = Field(None, description="The id for this battery document.")

framework_formula: str = Field(
None, description="The id for this battery document."
Expand Down Expand Up @@ -158,7 +158,7 @@ class ConversionVoltagePairDoc(VoltagePairDoc):


class ConversionElectrodeDoc(ConversionVoltagePairDoc):
task_id: str = Field(None, description="The id for this battery document.")
battery_id: str = Field(None, description="The id for this battery document.")

adj_pairs: List[ConversionVoltagePairDoc] = Field(
None,
Expand Down
8 changes: 4 additions & 4 deletions emmet-core/emmet/core/structure_group.py
Expand Up @@ -49,9 +49,9 @@ class StructureGroupDoc(BaseModel):
Group of structure
"""

task_id: str = Field(
material_id: str = Field(
None,
description="The combined task_id of the grouped document is given by the numerically smallest task id ",
description="The combined material_id of the grouped document is given by the numerically smallest task id ",
)

structure_matched: bool = Field(
Expand Down Expand Up @@ -122,7 +122,7 @@ def from_grouped_entries(
lowest_id = min(ids, key=_get_id_num)

fields = {
"task_id": lowest_id,
"material_id": lowest_id,
"grouped_ids": ids,
"structure_matched": structure_matched,
"framework_formula": framework_str,
Expand Down Expand Up @@ -223,7 +223,7 @@ def group_entries_with_structure_matcher(
def _get_id_num(task_id) -> Union[int, str]:
if isinstance(task_id, int):
return task_id
if isinstance(task_id, str) and "-" in task_id:
if isinstance(task_id, str):
return int(task_id.split("-")[-1])
else:
raise ValueError("TaskID needs to be either a number or of the form xxx-#####")
Expand Down
2 changes: 1 addition & 1 deletion emmet-core/emmet/core/vasp/material.py
Expand Up @@ -137,7 +137,7 @@ def _structure_eval(task: TaskDocument):
if len(relevant_calcs) > 0:
best_task_doc = relevant_calcs[0]
entry = best_task_doc.structure_entry
entry.data["task_id"] = entry.entry_id
entry.data["material_id"] = entry.entry_id
entry.entry_id = material_id
entries[rt] = entry

Expand Down
2 changes: 1 addition & 1 deletion tests/emmet-core/test_structure_group.py
Expand Up @@ -38,7 +38,7 @@ def test_StructureGroupDoc_from_grouped_entries(entries_lto):
sgroup_doc = StructureGroupDoc.from_grouped_entries(
entries_lto, ignored_species=["Li"], structure_matched=True
)
assert sgroup_doc.task_id == "mp-0"
assert sgroup_doc.material_id == "mp-0"
assert sgroup_doc.grouped_ids == ["mp-0", "mp-1", "mp-2", "mp-3", "mp-4", "mp-5"]
assert sgroup_doc.framework_formula == "TiO2"
assert sgroup_doc.ignored_species == ["Li"]
Expand Down

0 comments on commit bd71564

Please sign in to comment.