From 7dd33fd04408f2da416a755ef13def579f1c2a6f Mon Sep 17 00:00:00 2001 From: jmmshn Date: Thu, 11 Mar 2021 14:02:45 -0800 Subject: [PATCH 1/8] working insertion electrode builder --- .../emmet/builders/materials/electrodes.py | 309 +----------------- .../builders/materials/structure_groups.py | 269 +++++++++++++++ emmet-builders/emmet/builders/vasp/thermo.py | 10 +- emmet-core/emmet/core/electrode.py | 7 +- 4 files changed, 292 insertions(+), 303 deletions(-) create mode 100644 emmet-builders/emmet/builders/materials/structure_groups.py diff --git a/emmet-builders/emmet/builders/materials/electrodes.py b/emmet-builders/emmet/builders/materials/electrodes.py index 4b43c1e287..73be677873 100644 --- a/emmet-builders/emmet/builders/materials/electrodes.py +++ b/emmet-builders/emmet/builders/materials/electrodes.py @@ -18,280 +18,23 @@ from pymatgen.analysis.structure_matcher import StructureMatcher, ElementComparator from pymatgen.apps.battery.insertion_battery import InsertionElectrode from pymatgen.core import Structure -from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.entries.computed_entries import ComputedStructureEntry, ComputedEntry __author__ = "Jimmy Shen" __email__ = "jmmshn@lbl.gov" -from pymatgen.entries.computed_entries import ComputedEntry - - -def s_hash(el): - return el.data["comp_delith"] - - -# MatDoc = namedtuple("MatDoc", ["material_id", "structure", "formula_pretty", "framework"]) - -REDOX_ELEMENTS = [ - "Ti", - "V", - "Cr", - "Mn", - "Fe", - "Co", - "Ni", - "Cu", - "Nb", - "Mo", - "Sn", - "Sb", - "W", - "Re", - "Bi", - "C", - "Hf", -] - -# WORKING_IONS = ["Li", "Be", "Na", "Mg", "K", "Ca", "Rb", "Sr", "Cs", "Ba"] - -MAT_PROPS = [ - "structure", - "material_id", - "formula_pretty", -] - -sg_fields = ["number", "hall_number", "international", "hall", "choice"] - - -def generic_groupby(list_in, comp=operator.eq): - """ - Group a list of unsortable objects - Args: - list_in: A list of generic objects - comp: (Default value = operator.eq) The comparator - Returns: - [int] list of labels for the input list - """ - list_out = [None] * len(list_in) - label_num = 0 - for i1, ls1 in enumerate(list_out): - if ls1 is not None: - continue - list_out[i1] = label_num - for i2, ls2 in list(enumerate(list_out))[i1 + 1 :]: - if comp(list_in[i1], list_in[i2]): - if list_out[i2] is None: - list_out[i2] = list_out[i1] - else: - list_out[i1] = list_out[i2] - label_num -= 1 - label_num += 1 - return list_out - - -class StructureGroupBuilder(Builder): - def __init__( - self, - materials: MongoStore, - sgroups: MongoStore, - working_ion: str, - query: dict = None, - ltol: float = 0.2, - stol: float = 0.3, - angle_tol: float = 5.0, - check_newer: bool = True, - **kwargs, - ): - """ - Aggregate materials entries into sgroups that are topotactically similar to each other. - This is an incremental builder that makes ensures that each materials id belongs to one StructureGroupDoc document - Args: - materials (Store): Store of materials documents that contains the structures - sgroups (Store): Store of grouped material ids - query (dict): dictionary to limit materials to be analyzed --- - only applied to the materials when we need to group structures - the phase diagram is still constructed with the entire set - """ - self.materials = materials - self.sgroups = sgroups - self.working_ion = working_ion - self.query = query if query else {} - self.ltol = ltol - self.stol = stol - self.angle_tol = angle_tol - self.check_newer = check_newer - super().__init__(sources=[materials], targets=[sgroups], **kwargs) - - def prechunk(self, number_splits: int) -> Iterable[Dict]: - """ - TODO can implement this for distributed runs by adding filters - """ - pass - - def get_items(self): - """ - Summary of the steps: - - query the materials database for different chemical systems that satisfies the base query - "contains redox element and working ion" - - Get the full chemsys list of interest - - The main loop is over all these chemsys. within the main loop: - - get newest timestamp for the material documents (max_mat_time) - - get the oldest timestamp for the target documents (min_target_time) - - if min_target_time is < max_mat_time then nuke all the target documents - """ - - # All potentially interesting chemsys must contain the working ion - base_query = { - "$and": [ - {"elements": {"$in": REDOX_ELEMENTS + [self.working_ion]}}, - self.query.copy(), - ] - } - self.logger.debug(f"Initial Chemsys QUERY: {base_query}") - - # get a chemsys that only contains the working ion since the working ion - # must be present for there to be voltage steps - all_chemsys = self.materials.distinct("chemsys", criteria=base_query) - # Contains the working ion but not ONLY the working ion - all_chemsys = [ - *filter( - lambda x: self.working_ion in x and len(x) > 1, - [chemsys_.split("-") for chemsys_ in all_chemsys], - ) - ] - - self.logger.debug( - f"Performing initial checks on {len(all_chemsys)} chemical systems containing redox elements with or without the Working Ion." - ) - self.total = len(all_chemsys) - - for chemsys_l in all_chemsys: - chemsys = "-".join(sorted(chemsys_l)) - chemsys_wo = "-".join(sorted(set(chemsys_l) - {self.working_ion})) - chemsys_query = { - "$and": [ - {"chemsys": {"$in": [chemsys_wo, chemsys]}}, - self.query.copy(), - ] - } - self.logger.debug(f"QUERY: {chemsys_query}") - all_mats_in_chemsys = list( - self.materials.query( - criteria=chemsys_query, - properties=MAT_PROPS + [self.materials.last_updated_field], - ) - ) - self.logger.debug( - f"Found {len(all_mats_in_chemsys)} materials in {chemsys_wo}" - ) - if self.check_newer: - all_target_docs = list( - self.sgroups.query( - criteria={"chemsys": chemsys}, - properties=[ - "material_id", - self.sgroups.last_updated_field, - "grouped_ids", - ], - ) - ) - self.logger.debug( - f"Found {len(all_target_docs)} Grouped documents in {chemsys_wo}" - ) - - mat_times = [ - mat_doc[self.materials.last_updated_field] - for mat_doc in all_mats_in_chemsys - ] - max_mat_time = max(mat_times, default=datetime.min) - self.logger.debug( - f"The newest material doc was generated at {max_mat_time}." - ) - - target_times = [ - g_doc[self.materials.last_updated_field] - for g_doc in all_target_docs - ] - min_target_time = min(target_times, default=datetime.max) - self.logger.debug( - f"The newest GROUP doc was generated at {min_target_time}." - ) - - mat_ids = set( - [mat_doc["material_id"] for mat_doc in all_mats_in_chemsys] - ) - - # If any material id is missing or if any material id has been updated - target_mat_ids = set() - for g_doc in all_target_docs: - target_mat_ids |= set(g_doc["grouped_ids"]) - - self.logger.debug( - f"There are {len(mat_ids)} material ids in the source database vs {len(target_mat_ids)} in the target database." - ) - if mat_ids == target_mat_ids and max_mat_time < min_target_time: - continue - else: - self.logger.info( - f"Nuking all {len(target_mat_ids)} documents in chemsys {chemsys} in the target database." - ) - self._remove_targets(list(target_mat_ids)) - - yield {"chemsys": chemsys, "materials": all_mats_in_chemsys} - - def update_targets(self, items: List): - items = list(filter(None, chain.from_iterable(items))) - if len(items) > 0: - self.logger.info("Updating {} sgroups documents".format(len(items))) - for struct_group_dict in items: - struct_group_dict[self.sgroups.last_updated_field] = datetime.utcnow() - self.sgroups.update(docs=items, key=["material_id"]) - else: - self.logger.info("No items to update") - - def _entry_from_mat_doc(self, mdoc): - # Note since we are just structure grouping we don't need to be careful with energy or correction - # All of the energy analysis is left to other builders - d_ = { - "entry_id": mdoc["material_id"], - "structure": mdoc["structure"], - "energy": -math.inf, - "correction": -math.inf, - } - return ComputedStructureEntry.from_dict(d_) - - def process_item(self, item: Any) -> Any: - entries = [*map(self._entry_from_mat_doc, item["materials"])] - s_groups = StructureGroupDoc.from_ungrouped_structure_entries( - entries=entries, - ignored_species=[self.working_ion], - ltol=self.ltol, - stol=self.stol, - angle_tol=self.angle_tol, - ) - # append the working_ion to the group ids - for sg in s_groups: - sg.material_id = f"{sg.material_id}_{self.working_ion}" - return [sg.dict() for sg in s_groups] - - def _remove_targets(self, rm_ids): - self.sgroups.remove_docs({"material_id": {"$in": rm_ids}}) - - class InsertionElectrodeBuilder(MapBuilder): def __init__( self, grouped_materials: MongoStore, insertion_electrode: MongoStore, thermo: MongoStore, - material: MongoStore, query: dict = None, **kwargs, ): self.grouped_materials = grouped_materials self.insertion_electrode = insertion_electrode self.thermo = thermo - self.material = material qq_ = {} if query is None else query qq_.update({"structure_matched": True, "has_distinct_compositions": True}) super().__init__( @@ -304,12 +47,12 @@ def __init__( def get_items(self): """""" - @lru_cache(None) + @lru_cache() def get_working_ion_entry(working_ion): with self.thermo as store: working_ion_docs = [*store.query({"chemsys": working_ion})] best_wion = min( - working_ion_docs, key=lambda x: x["thermo"]["energy_per_atom"] + working_ion_docs, key=lambda x: x["energy_per_atom"] ) return best_wion @@ -325,35 +68,23 @@ def modify_item(item): {"material_id": {"$in": item["grouped_ids"]}}, ] }, - properties=["material_id", "_sbxn", "thermo"], - ) - ] - - with self.material as store: - material_docs = [ - *store.query( - { - "$and": [ - {"material_id": {"$in": item["grouped_ids"]}}, - {"_sbxn": {"$in": ["core"]}}, - ] - }, - properties=["material_id", "structure"], + properties=["material_id", "_sbxn", "thermo", "entries", "energy_type", "energy_above_hull"], ) ] self.logger.debug(f"Found for {len(thermo_docs)} Thermo Documents.") + if len(item["ignored_species"]) != 1: raise ValueError( "Insertion electrode can only be defined for one working ion species" ) + working_ion_doc = get_working_ion_entry(item["ignored_species"][0]) return { "material_id": item["material_id"], "working_ion_doc": working_ion_doc, "working_ion": item["ignored_species"][0], "thermo_docs": thermo_docs, - "material_docs": material_docs, } yield from map(modify_item, super().get_items()) @@ -363,40 +94,26 @@ def unary_function(self, item): - Add volume information to each entry to create the insertion electrode document - Add the host structure """ - entries = [tdoc_["thermo"]["entry"] for tdoc_ in item["thermo_docs"]] - entries = list(map(ComputedEntry.from_dict, entries)) + entries = [tdoc_["entries"][tdoc_["energy_type"]] for tdoc_ in item["thermo_docs"]] + entries = list(map(ComputedStructureEntry.from_dict, entries)) working_ion_entry = ComputedEntry.from_dict( - item["working_ion_doc"]["thermo"]["entry"] + item["working_ion_doc"]["entries"][item["working_ion_doc"]['energy_type']] ) working_ion = working_ion_entry.composition.reduced_formula + decomp_energies = { - d_["material_id"]: d_["thermo"]["e_above_hull"] + d_["material_id"]: d_["energy_above_hull"] for d_ in item["thermo_docs"] } - mat_structures = { - mat_d_["material_id"]: Structure.from_dict(mat_d_["structure"]) - for mat_d_ in item["material_docs"] - } least_wion_ent = min( entries, key=lambda x: x.composition.get_atomic_fraction(working_ion) ) - mdoc_ = next( - filter( - lambda x: x["material_id"] == least_wion_ent.entry_id, - item["material_docs"], - ) - ) - host_structure = Structure.from_dict(mdoc_["structure"]) + host_structure = least_wion_ent.structure.copy() host_structure.remove_species([item["working_ion"]]) for ient in entries: - if mat_structures[ient.entry_id].composition != ient.composition: - raise RuntimeError( - f"In {item['material_id']}: the compositions for task {ient.entry_id} are matched " - "between the StructureGroup DB and the Thermo DB " - ) - ient.data["volume"] = mat_structures[ient.entry_id].volume + ient.data["volume"] = ient.structure.volume ient.data["decomposition_energy"] = decomp_energies[ient.entry_id] ie = InsertionElectrodeDoc.from_entries( diff --git a/emmet-builders/emmet/builders/materials/structure_groups.py b/emmet-builders/emmet/builders/materials/structure_groups.py new file mode 100644 index 0000000000..2cd463495e --- /dev/null +++ b/emmet-builders/emmet/builders/materials/structure_groups.py @@ -0,0 +1,269 @@ +import operator +import math +from datetime import datetime +from itertools import chain +from typing import Iterable, Dict, List, Any + +from emmet.core.structure_group import StructureGroupDoc +from maggma.builders import Builder +from maggma.stores import MongoStore +from pymatgen.entries.computed_entries import ComputedStructureEntry + +__author__ = "Jimmy Shen" +__email__ = "jmmshn@lbl.gov" + +from pymatgen.entries.computed_entries import ComputedEntry + +def s_hash(el): + return el.data["comp_delith"] + + +# MatDoc = namedtuple("MatDoc", ["material_id", "structure", "formula_pretty", "framework"]) + +REDOX_ELEMENTS = [ + "Ti", + "V", + "Cr", + "Mn", + "Fe", + "Co", + "Ni", + "Cu", + "Nb", + "Mo", + "Sn", + "Sb", + "W", + "Re", + "Bi", + "C", + "Hf", +] + +# WORKING_IONS = ["Li", "Be", "Na", "Mg", "K", "Ca", "Rb", "Sr", "Cs", "Ba"] + +MAT_PROPS = [ + "structure", + "material_id", + "formula_pretty", +] + +sg_fields = ["number", "hall_number", "international", "hall", "choice"] + + +def generic_groupby(list_in, comp=operator.eq): + """ + Group a list of unsortable objects + Args: + list_in: A list of generic objects + comp: (Default value = operator.eq) The comparator + Returns: + [int] list of labels for the input list + """ + list_out = [None] * len(list_in) + label_num = 0 + for i1, ls1 in enumerate(list_out): + if ls1 is not None: + continue + list_out[i1] = label_num + for i2, ls2 in list(enumerate(list_out))[i1 + 1 :]: + if comp(list_in[i1], list_in[i2]): + if list_out[i2] is None: + list_out[i2] = list_out[i1] + else: + list_out[i1] = list_out[i2] + label_num -= 1 + label_num += 1 + return list_out + + + +class StructureGroupBuilder(Builder): + def __init__( + self, + materials: MongoStore, + sgroups: MongoStore, + working_ion: str, + query: dict = None, + ltol: float = 0.2, + stol: float = 0.3, + angle_tol: float = 5.0, + check_newer: bool = True, + **kwargs, + ): + """ + Aggregate materials entries into sgroups that are topotactically similar to each other. + This is an incremental builder that makes ensures that each materials id belongs to one StructureGroupDoc document + Args: + materials (Store): Store of materials documents that contains the structures + sgroups (Store): Store of grouped material ids + query (dict): dictionary to limit materials to be analyzed --- + only applied to the materials when we need to group structures + the phase diagram is still constructed with the entire set + """ + self.materials = materials + self.sgroups = sgroups + self.working_ion = working_ion + self.query = query if query else {} + self.ltol = ltol + self.stol = stol + self.angle_tol = angle_tol + self.check_newer = check_newer + super().__init__(sources=[materials], targets=[sgroups], **kwargs) + + def prechunk(self, number_splits: int) -> Iterable[Dict]: + """ + TODO can implement this for distributed runs by adding filters + """ + pass + + def get_items(self): + """ + Summary of the steps: + - query the materials database for different chemical systems that satisfies the base query + "contains redox element and working ion" + - Get the full chemsys list of interest + - The main loop is over all these chemsys. within the main loop: + - get newest timestamp for the material documents (max_mat_time) + - get the oldest timestamp for the target documents (min_target_time) + - if min_target_time is < max_mat_time then nuke all the target documents + """ + + # All potentially interesting chemsys must contain the working ion + base_query = { + "$and": [ + {"elements": {"$in": REDOX_ELEMENTS + [self.working_ion]}}, + self.query.copy(), + ] + } + self.logger.debug(f"Initial Chemsys QUERY: {base_query}") + + # get a chemsys that only contains the working ion since the working ion + # must be present for there to be voltage steps + all_chemsys = self.materials.distinct("chemsys", criteria=base_query) + # Contains the working ion but not ONLY the working ion + all_chemsys = [ + *filter( + lambda x: self.working_ion in x and len(x) > 1, + [chemsys_.split("-") for chemsys_ in all_chemsys], + ) + ] + + self.logger.debug( + f"Performing initial checks on {len(all_chemsys)} chemical systems containing redox elements with or without the Working Ion." + ) + self.total = len(all_chemsys) + + for chemsys_l in all_chemsys: + chemsys = "-".join(sorted(chemsys_l)) + chemsys_wo = "-".join(sorted(set(chemsys_l) - {self.working_ion})) + chemsys_query = { + "$and": [ + {"chemsys": {"$in": [chemsys_wo, chemsys]}}, + self.query.copy(), + ] + } + self.logger.debug(f"QUERY: {chemsys_query}") + all_mats_in_chemsys = list( + self.materials.query( + criteria=chemsys_query, + properties=MAT_PROPS + [self.materials.last_updated_field], + ) + ) + self.logger.debug( + f"Found {len(all_mats_in_chemsys)} materials in {chemsys_wo}" + ) + if self.check_newer: + all_target_docs = list( + self.sgroups.query( + criteria={"chemsys": chemsys}, + properties=[ + "material_id", + self.sgroups.last_updated_field, + "grouped_ids", + ], + ) + ) + self.logger.debug( + f"Found {len(all_target_docs)} Grouped documents in {chemsys_wo}" + ) + + mat_times = [ + mat_doc[self.materials.last_updated_field] + for mat_doc in all_mats_in_chemsys + ] + max_mat_time = max(mat_times, default=datetime.min) + self.logger.debug( + f"The newest material doc was generated at {max_mat_time}." + ) + + target_times = [ + g_doc[self.materials.last_updated_field] + for g_doc in all_target_docs + ] + min_target_time = min(target_times, default=datetime.max) + self.logger.debug( + f"The newest GROUP doc was generated at {min_target_time}." + ) + + mat_ids = set( + [mat_doc["material_id"] for mat_doc in all_mats_in_chemsys] + ) + + # If any material id is missing or if any material id has been updated + target_mat_ids = set() + for g_doc in all_target_docs: + target_mat_ids |= set(g_doc["grouped_ids"]) + + self.logger.debug( + f"There are {len(mat_ids)} material ids in the source database vs {len(target_mat_ids)} in the target database." + ) + if mat_ids == target_mat_ids and max_mat_time < min_target_time: + continue + else: + self.logger.info( + f"Nuking all {len(target_mat_ids)} documents in chemsys {chemsys} in the target database." + ) + self._remove_targets(list(target_mat_ids)) + + yield {"chemsys": chemsys, "materials": all_mats_in_chemsys} + + def update_targets(self, items: List): + items = list(filter(None, chain.from_iterable(items))) + if len(items) > 0: + self.logger.info("Updating {} sgroups documents".format(len(items))) + for struct_group_dict in items: + struct_group_dict[self.sgroups.last_updated_field] = datetime.utcnow() + self.sgroups.update(docs=items, key=["material_id"]) + else: + self.logger.info("No items to update") + + def _entry_from_mat_doc(self, mdoc): + # Note since we are just structure grouping we don't need to be careful with energy or correction + # All of the energy analysis is left to other builders + d_ = { + "entry_id": mdoc["material_id"], + "structure": mdoc["structure"], + "energy": -math.inf, + "correction": -math.inf, + } + return ComputedStructureEntry.from_dict(d_) + + def process_item(self, item: Any) -> Any: + entries = [*map(self._entry_from_mat_doc, item["materials"])] + s_groups = StructureGroupDoc.from_ungrouped_structure_entries( + entries=entries, + ignored_species=[self.working_ion], + ltol=self.ltol, + stol=self.stol, + angle_tol=self.angle_tol, + ) + # append the working_ion to the group ids + for sg in s_groups: + sg.material_id = f"{sg.material_id}_{self.working_ion}" + return [sg.dict() for sg in s_groups] + + def _remove_targets(self, rm_ids): + self.sgroups.remove_docs({"material_id": {"$in": rm_ids}}) + + diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index 4cc41b44f5..514e6fe957 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -116,17 +116,17 @@ def process_item(self, item: Tuple[List[str], List[ComputedEntry]]): self.logger.debug(f"Procesing {len(entries)} entries for {chemsys}") - material_entries = defaultdict(lambda: defaultdict(list)) + material_entries = defaultdict(dict) pd_entries = [] for entry in entries: - material_entries[entry.entry_id][entry.data["run_type"]].append(entry) + material_entries[entry.entry_id][entry.data["run_type"]] = entry # TODO: How to make this general and controllable via SETTINGS? for material_id in material_entries: if "GGA+U" in material_entries[material_id]: - pd_entries.extend(material_entries[material_id]["GGA+U"]) + pd_entries.append(material_entries[material_id]["GGA+U"]) elif "GGA" in material_entries[material_id]: - pd_entries.extend(material_entries[material_id]["GGA"]) + pd_entries.append(material_entries[material_id]["GGA"]) pd_entries = self.compatibility.process_entries(pd_entries) try: @@ -141,7 +141,7 @@ def process_item(self, item: Tuple[List[str], List[ComputedEntry]]): elsyms.extend([el.symbol for el in e.composition.elements]) self.logger.warning( - f"Phase diagram errorin chemsys {'-'.join(sorted(set(elsyms)))}: {p}" + f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}" ) return [] except Exception as e: diff --git a/emmet-core/emmet/core/electrode.py b/emmet-core/emmet/core/electrode.py index 26f9ce58d9..e1d643cff9 100644 --- a/emmet-core/emmet/core/electrode.py +++ b/emmet-core/emmet/core/electrode.py @@ -117,6 +117,8 @@ class InsertionElectrodeDoc(InsertionVoltagePairDoc): framework: Composition + electrode_object: Dict + # Make sure that the datetime field is properly formatted @validator("last_updated", pre=True) def last_updated_dict_ok(cls, v): @@ -132,7 +134,7 @@ def from_entries( ) -> Union["InsertionElectrodeDoc", None]: try: ie = InsertionElectrode.from_entries( - entries=grouped_entries, working_ion_entry=working_ion_entry + entries=grouped_entries, working_ion_entry=working_ion_entry, strip_structures=True ) except IndexError: return None @@ -140,9 +142,10 @@ def from_entries( d["num_steps"] = d.pop("nsteps", None) d["last_updated"] = datetime.utcnow() return cls( - task_id=task_id, + battery_id=task_id, host_structure=host_structure.as_dict(), framework=Composition(d["framework_formula"]), + electrode_object=ie.as_dict(), **d ) From c23b77b68bb2605f970e646ec26410f7fdec8ce9 Mon Sep 17 00:00:00 2001 From: jmmshn Date: Fri, 12 Mar 2021 07:21:21 -0800 Subject: [PATCH 2/8] combined all electrode builders into one file --- .../emmet/builders/materials/electrodes.py | 287 +++++++++++++++++- .../builders/materials/structure_groups.py | 269 ---------------- 2 files changed, 271 insertions(+), 285 deletions(-) delete mode 100644 emmet-builders/emmet/builders/materials/structure_groups.py diff --git a/emmet-builders/emmet/builders/materials/electrodes.py b/emmet-builders/emmet/builders/materials/electrodes.py index 73be677873..e8efefe193 100644 --- a/emmet-builders/emmet/builders/materials/electrodes.py +++ b/emmet-builders/emmet/builders/materials/electrodes.py @@ -1,36 +1,291 @@ -import operator import math +import operator from collections import namedtuple from datetime import datetime from functools import lru_cache -from itertools import groupby, chain +from itertools import chain, groupby from pprint import pprint -from typing import Iterable, Dict, List, Any +from typing import Any, Dict, Iterable, List -from emmet.core.electrode import InsertionElectrodeDoc -from emmet.core.structure_group import StructureGroupDoc -from emmet.core.utils import jsanitize from maggma.builders import Builder, MapBuilder from maggma.stores import MongoStore from monty.json import MontyEncoder from numpy import unique -from pymatgen.core import Composition -from pymatgen.analysis.structure_matcher import StructureMatcher, ElementComparator +from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher from pymatgen.apps.battery.insertion_battery import InsertionElectrode -from pymatgen.core import Structure -from pymatgen.entries.computed_entries import ComputedStructureEntry, ComputedEntry +from pymatgen.core import Composition, Structure +from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry + +from emmet.core.electrode import InsertionElectrodeDoc +from emmet.core.structure_group import StructureGroupDoc +from emmet.core.utils import jsanitize __author__ = "Jimmy Shen" __email__ = "jmmshn@lbl.gov" +from pymatgen.entries.computed_entries import ComputedEntry + + +def s_hash(el): + return el.data["comp_delith"] + + +# MatDoc = namedtuple("MatDoc", ["material_id", "structure", "formula_pretty", "framework"]) + +REDOX_ELEMENTS = [ + "Ti", + "V", + "Cr", + "Mn", + "Fe", + "Co", + "Ni", + "Cu", + "Nb", + "Mo", + "Sn", + "Sb", + "W", + "Re", + "Bi", + "C", + "Hf", +] + +# WORKING_IONS = ["Li", "Be", "Na", "Mg", "K", "Ca", "Rb", "Sr", "Cs", "Ba"] + +MAT_PROPS = [ + "structure", + "material_id", + "formula_pretty", +] + +sg_fields = ["number", "hall_number", "international", "hall", "choice"] + + +def generic_groupby(list_in, comp=operator.eq): + """ + Group a list of unsortable objects + Args: + list_in: A list of generic objects + comp: (Default value = operator.eq) The comparator + Returns: + [int] list of labels for the input list + """ + list_out = [None] * len(list_in) + label_num = 0 + for i1, ls1 in enumerate(list_out): + if ls1 is not None: + continue + list_out[i1] = label_num + for i2, ls2 in list(enumerate(list_out))[i1 + 1 :]: + if comp(list_in[i1], list_in[i2]): + if list_out[i2] is None: + list_out[i2] = list_out[i1] + else: + list_out[i1] = list_out[i2] + label_num -= 1 + label_num += 1 + return list_out + + + +class StructureGroupBuilder(Builder): + def __init__( + self, + materials: MongoStore, + sgroups: MongoStore, + working_ion: str, + query: dict = None, + ltol: float = 0.2, + stol: float = 0.3, + angle_tol: float = 5.0, + check_newer: bool = True, + **kwargs, + ): + """ + Aggregate materials entries into sgroups that are topotactically similar to each other. + This is an incremental builder that makes ensures that each materials id belongs to one StructureGroupDoc document + Args: + materials (Store): Store of materials documents that contains the structures + sgroups (Store): Store of grouped material ids + query (dict): dictionary to limit materials to be analyzed --- + only applied to the materials when we need to group structures + the phase diagram is still constructed with the entire set + """ + self.materials = materials + self.sgroups = sgroups + self.working_ion = working_ion + self.query = query if query else {} + self.ltol = ltol + self.stol = stol + self.angle_tol = angle_tol + self.check_newer = check_newer + super().__init__(sources=[materials], targets=[sgroups], **kwargs) + + def prechunk(self, number_splits: int) -> Iterable[Dict]: + """ + TODO can implement this for distributed runs by adding filters + """ + pass + + def get_items(self): + """ + Summary of the steps: + - query the materials database for different chemical systems that satisfies the base query + "contains redox element and working ion" + - Get the full chemsys list of interest + - The main loop is over all these chemsys. within the main loop: + - get newest timestamp for the material documents (max_mat_time) + - get the oldest timestamp for the target documents (min_target_time) + - if min_target_time is < max_mat_time then nuke all the target documents + """ + + # All potentially interesting chemsys must contain the working ion + base_query = { + "$and": [ + {"elements": {"$in": REDOX_ELEMENTS + [self.working_ion]}}, + self.query.copy(), + ] + } + self.logger.debug(f"Initial Chemsys QUERY: {base_query}") + + # get a chemsys that only contains the working ion since the working ion + # must be present for there to be voltage steps + all_chemsys = self.materials.distinct("chemsys", criteria=base_query) + # Contains the working ion but not ONLY the working ion + all_chemsys = [ + *filter( + lambda x: self.working_ion in x and len(x) > 1, + [chemsys_.split("-") for chemsys_ in all_chemsys], + ) + ] + + self.logger.debug( + f"Performing initial checks on {len(all_chemsys)} chemical systems containing redox elements with or without the Working Ion." + ) + self.total = len(all_chemsys) + + for chemsys_l in all_chemsys: + chemsys = "-".join(sorted(chemsys_l)) + chemsys_wo = "-".join(sorted(set(chemsys_l) - {self.working_ion})) + chemsys_query = { + "$and": [ + {"chemsys": {"$in": [chemsys_wo, chemsys]}}, + self.query.copy(), + ] + } + self.logger.debug(f"QUERY: {chemsys_query}") + all_mats_in_chemsys = list( + self.materials.query( + criteria=chemsys_query, + properties=MAT_PROPS + [self.materials.last_updated_field], + ) + ) + self.logger.debug( + f"Found {len(all_mats_in_chemsys)} materials in {chemsys_wo}" + ) + if self.check_newer: + all_target_docs = list( + self.sgroups.query( + criteria={"chemsys": chemsys}, + properties=[ + "material_id", + self.sgroups.last_updated_field, + "grouped_ids", + ], + ) + ) + self.logger.debug( + f"Found {len(all_target_docs)} Grouped documents in {chemsys_wo}" + ) + + mat_times = [ + mat_doc[self.materials.last_updated_field] + for mat_doc in all_mats_in_chemsys + ] + max_mat_time = max(mat_times, default=datetime.min) + self.logger.debug( + f"The newest material doc was generated at {max_mat_time}." + ) + + target_times = [ + g_doc[self.materials.last_updated_field] + for g_doc in all_target_docs + ] + min_target_time = min(target_times, default=datetime.max) + self.logger.debug( + f"The newest GROUP doc was generated at {min_target_time}." + ) + + mat_ids = set( + [mat_doc["material_id"] for mat_doc in all_mats_in_chemsys] + ) + + # If any material id is missing or if any material id has been updated + target_mat_ids = set() + for g_doc in all_target_docs: + target_mat_ids |= set(g_doc["grouped_ids"]) + + self.logger.debug( + f"There are {len(mat_ids)} material ids in the source database vs {len(target_mat_ids)} in the target database." + ) + if mat_ids == target_mat_ids and max_mat_time < min_target_time: + continue + else: + self.logger.info( + f"Nuking all {len(target_mat_ids)} documents in chemsys {chemsys} in the target database." + ) + self._remove_targets(list(target_mat_ids)) + + yield {"chemsys": chemsys, "materials": all_mats_in_chemsys} + + def update_targets(self, items: List): + items = list(filter(None, chain.from_iterable(items))) + if len(items) > 0: + self.logger.info("Updating {} sgroups documents".format(len(items))) + for struct_group_dict in items: + struct_group_dict[self.sgroups.last_updated_field] = datetime.utcnow() + self.sgroups.update(docs=items, key=["material_id"]) + else: + self.logger.info("No items to update") + + def _entry_from_mat_doc(self, mdoc): + # Note since we are just structure grouping we don't need to be careful with energy or correction + # All of the energy analysis is left to other builders + d_ = { + "entry_id": mdoc["material_id"], + "structure": mdoc["structure"], + "energy": -math.inf, + "correction": -math.inf, + } + return ComputedStructureEntry.from_dict(d_) + + def process_item(self, item: Any) -> Any: + entries = [*map(self._entry_from_mat_doc, item["materials"])] + s_groups = StructureGroupDoc.from_ungrouped_structure_entries( + entries=entries, + ignored_species=[self.working_ion], + ltol=self.ltol, + stol=self.stol, + angle_tol=self.angle_tol, + ) + # append the working_ion to the group ids + for sg in s_groups: + sg.material_id = f"{sg.material_id}_{self.working_ion}" + return [sg.dict() for sg in s_groups] + + def _remove_targets(self, rm_ids): + self.sgroups.remove_docs({"material_id": {"$in": rm_ids}}) + class InsertionElectrodeBuilder(MapBuilder): def __init__( - self, - grouped_materials: MongoStore, - insertion_electrode: MongoStore, - thermo: MongoStore, - query: dict = None, - **kwargs, + self, + grouped_materials: MongoStore, + insertion_electrode: MongoStore, + thermo: MongoStore, + query: dict = None, + **kwargs, ): self.grouped_materials = grouped_materials self.insertion_electrode = insertion_electrode diff --git a/emmet-builders/emmet/builders/materials/structure_groups.py b/emmet-builders/emmet/builders/materials/structure_groups.py deleted file mode 100644 index 2cd463495e..0000000000 --- a/emmet-builders/emmet/builders/materials/structure_groups.py +++ /dev/null @@ -1,269 +0,0 @@ -import operator -import math -from datetime import datetime -from itertools import chain -from typing import Iterable, Dict, List, Any - -from emmet.core.structure_group import StructureGroupDoc -from maggma.builders import Builder -from maggma.stores import MongoStore -from pymatgen.entries.computed_entries import ComputedStructureEntry - -__author__ = "Jimmy Shen" -__email__ = "jmmshn@lbl.gov" - -from pymatgen.entries.computed_entries import ComputedEntry - -def s_hash(el): - return el.data["comp_delith"] - - -# MatDoc = namedtuple("MatDoc", ["material_id", "structure", "formula_pretty", "framework"]) - -REDOX_ELEMENTS = [ - "Ti", - "V", - "Cr", - "Mn", - "Fe", - "Co", - "Ni", - "Cu", - "Nb", - "Mo", - "Sn", - "Sb", - "W", - "Re", - "Bi", - "C", - "Hf", -] - -# WORKING_IONS = ["Li", "Be", "Na", "Mg", "K", "Ca", "Rb", "Sr", "Cs", "Ba"] - -MAT_PROPS = [ - "structure", - "material_id", - "formula_pretty", -] - -sg_fields = ["number", "hall_number", "international", "hall", "choice"] - - -def generic_groupby(list_in, comp=operator.eq): - """ - Group a list of unsortable objects - Args: - list_in: A list of generic objects - comp: (Default value = operator.eq) The comparator - Returns: - [int] list of labels for the input list - """ - list_out = [None] * len(list_in) - label_num = 0 - for i1, ls1 in enumerate(list_out): - if ls1 is not None: - continue - list_out[i1] = label_num - for i2, ls2 in list(enumerate(list_out))[i1 + 1 :]: - if comp(list_in[i1], list_in[i2]): - if list_out[i2] is None: - list_out[i2] = list_out[i1] - else: - list_out[i1] = list_out[i2] - label_num -= 1 - label_num += 1 - return list_out - - - -class StructureGroupBuilder(Builder): - def __init__( - self, - materials: MongoStore, - sgroups: MongoStore, - working_ion: str, - query: dict = None, - ltol: float = 0.2, - stol: float = 0.3, - angle_tol: float = 5.0, - check_newer: bool = True, - **kwargs, - ): - """ - Aggregate materials entries into sgroups that are topotactically similar to each other. - This is an incremental builder that makes ensures that each materials id belongs to one StructureGroupDoc document - Args: - materials (Store): Store of materials documents that contains the structures - sgroups (Store): Store of grouped material ids - query (dict): dictionary to limit materials to be analyzed --- - only applied to the materials when we need to group structures - the phase diagram is still constructed with the entire set - """ - self.materials = materials - self.sgroups = sgroups - self.working_ion = working_ion - self.query = query if query else {} - self.ltol = ltol - self.stol = stol - self.angle_tol = angle_tol - self.check_newer = check_newer - super().__init__(sources=[materials], targets=[sgroups], **kwargs) - - def prechunk(self, number_splits: int) -> Iterable[Dict]: - """ - TODO can implement this for distributed runs by adding filters - """ - pass - - def get_items(self): - """ - Summary of the steps: - - query the materials database for different chemical systems that satisfies the base query - "contains redox element and working ion" - - Get the full chemsys list of interest - - The main loop is over all these chemsys. within the main loop: - - get newest timestamp for the material documents (max_mat_time) - - get the oldest timestamp for the target documents (min_target_time) - - if min_target_time is < max_mat_time then nuke all the target documents - """ - - # All potentially interesting chemsys must contain the working ion - base_query = { - "$and": [ - {"elements": {"$in": REDOX_ELEMENTS + [self.working_ion]}}, - self.query.copy(), - ] - } - self.logger.debug(f"Initial Chemsys QUERY: {base_query}") - - # get a chemsys that only contains the working ion since the working ion - # must be present for there to be voltage steps - all_chemsys = self.materials.distinct("chemsys", criteria=base_query) - # Contains the working ion but not ONLY the working ion - all_chemsys = [ - *filter( - lambda x: self.working_ion in x and len(x) > 1, - [chemsys_.split("-") for chemsys_ in all_chemsys], - ) - ] - - self.logger.debug( - f"Performing initial checks on {len(all_chemsys)} chemical systems containing redox elements with or without the Working Ion." - ) - self.total = len(all_chemsys) - - for chemsys_l in all_chemsys: - chemsys = "-".join(sorted(chemsys_l)) - chemsys_wo = "-".join(sorted(set(chemsys_l) - {self.working_ion})) - chemsys_query = { - "$and": [ - {"chemsys": {"$in": [chemsys_wo, chemsys]}}, - self.query.copy(), - ] - } - self.logger.debug(f"QUERY: {chemsys_query}") - all_mats_in_chemsys = list( - self.materials.query( - criteria=chemsys_query, - properties=MAT_PROPS + [self.materials.last_updated_field], - ) - ) - self.logger.debug( - f"Found {len(all_mats_in_chemsys)} materials in {chemsys_wo}" - ) - if self.check_newer: - all_target_docs = list( - self.sgroups.query( - criteria={"chemsys": chemsys}, - properties=[ - "material_id", - self.sgroups.last_updated_field, - "grouped_ids", - ], - ) - ) - self.logger.debug( - f"Found {len(all_target_docs)} Grouped documents in {chemsys_wo}" - ) - - mat_times = [ - mat_doc[self.materials.last_updated_field] - for mat_doc in all_mats_in_chemsys - ] - max_mat_time = max(mat_times, default=datetime.min) - self.logger.debug( - f"The newest material doc was generated at {max_mat_time}." - ) - - target_times = [ - g_doc[self.materials.last_updated_field] - for g_doc in all_target_docs - ] - min_target_time = min(target_times, default=datetime.max) - self.logger.debug( - f"The newest GROUP doc was generated at {min_target_time}." - ) - - mat_ids = set( - [mat_doc["material_id"] for mat_doc in all_mats_in_chemsys] - ) - - # If any material id is missing or if any material id has been updated - target_mat_ids = set() - for g_doc in all_target_docs: - target_mat_ids |= set(g_doc["grouped_ids"]) - - self.logger.debug( - f"There are {len(mat_ids)} material ids in the source database vs {len(target_mat_ids)} in the target database." - ) - if mat_ids == target_mat_ids and max_mat_time < min_target_time: - continue - else: - self.logger.info( - f"Nuking all {len(target_mat_ids)} documents in chemsys {chemsys} in the target database." - ) - self._remove_targets(list(target_mat_ids)) - - yield {"chemsys": chemsys, "materials": all_mats_in_chemsys} - - def update_targets(self, items: List): - items = list(filter(None, chain.from_iterable(items))) - if len(items) > 0: - self.logger.info("Updating {} sgroups documents".format(len(items))) - for struct_group_dict in items: - struct_group_dict[self.sgroups.last_updated_field] = datetime.utcnow() - self.sgroups.update(docs=items, key=["material_id"]) - else: - self.logger.info("No items to update") - - def _entry_from_mat_doc(self, mdoc): - # Note since we are just structure grouping we don't need to be careful with energy or correction - # All of the energy analysis is left to other builders - d_ = { - "entry_id": mdoc["material_id"], - "structure": mdoc["structure"], - "energy": -math.inf, - "correction": -math.inf, - } - return ComputedStructureEntry.from_dict(d_) - - def process_item(self, item: Any) -> Any: - entries = [*map(self._entry_from_mat_doc, item["materials"])] - s_groups = StructureGroupDoc.from_ungrouped_structure_entries( - entries=entries, - ignored_species=[self.working_ion], - ltol=self.ltol, - stol=self.stol, - angle_tol=self.angle_tol, - ) - # append the working_ion to the group ids - for sg in s_groups: - sg.material_id = f"{sg.material_id}_{self.working_ion}" - return [sg.dict() for sg in s_groups] - - def _remove_targets(self, rm_ids): - self.sgroups.remove_docs({"material_id": {"$in": rm_ids}}) - - From 7beca8c359e327bf9b39edc0079b0d8a67d74c0e Mon Sep 17 00:00:00 2001 From: jmmshn Date: Fri, 12 Mar 2021 12:27:18 -0800 Subject: [PATCH 3/8] wip wip --- emmet-core/emmet/core/migration_graph.py | 117 +++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 emmet-core/emmet/core/migration_graph.py diff --git a/emmet-core/emmet/core/migration_graph.py b/emmet-core/emmet/core/migration_graph.py new file mode 100644 index 0000000000..e96e783f81 --- /dev/null +++ b/emmet-core/emmet/core/migration_graph.py @@ -0,0 +1,117 @@ +from typing import Dict, List, Union, Tuple + +from pydantic import BaseModel, Field, validator +from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph +from pymatgen.analysis.diffusion.neb.pathfinder import MigrationHop +from pymatgen.analysis.graphs import StructureGraph +from pymatgen.core import Composition, Structure, PeriodicSite + + +class Hop(BaseModel): + """ + Data for a particular hop, this is distinct from the Migration Hop object since this document + only stores the data related a particualr hop but not the symmetrized structure itself. + """ + iindex: int = Field(None, description="") + eindex: int = Field(None, description="") + ipos: Tuple[float, float, float] = Field(None, description="") + epos: Tuple[float, float, float] = Field(None, description="") + ipos_cart: Tuple[float, float, float] = Field(None, description="") + epos_cart: Tuple[float, float, float] = Field(None, description="") + to_jimage: Tuple[int, int, int] = Field(None, description="") + distance: float = Field(None, description="") + hop_label: int = Field(None, description="") + + +class MigrationGraphDoc(BaseModel): + """ + Data for MigrationGraph objects from pymatgen-diffusion. + Note: + This will just be used to construct the object for each material. + The only data we will use are the "site energies" defined at each meta-stable migrating ion site. + In the future more advanced query capabilities should be introduced with fields in the document model. + """ + + structure: Structure = Field( + None, + description="The atomic structure with all migting ion sites represented as atoms of the same species." + ) + + m_graph: StructureGraph = Field( + None, + description="The structure graph that represents the migration network." + ) + + hops: Dict[int, Hop] = Field( + None, + description="All of the hops in the system given as a list." + ) + + unique_hops: Dict[int, Hop] = Field( + None, + description="The unique hops dictionary keyed by the hop label {0: {=Dictionary of properties=}}" + ) + + host_structure: Structure = Field( + None, + description="The empty host lattice without the migrating ion." + ) + + symprec: float = Field(None, description="Parameter used by pymatgen to determin equivalent hops.") + + vac_mode: bool = Field(None, description="Indicates whether vacancy mode should be used [currently under-supported].") + + @classmethod + def from_migration_graph(cls, migration_graph: MigrationGraph): + """ + Construct the document using a MigrationGraph object + """ + summary_dict = migration_graph.get_summary_dict() + + return cls( + structure=migration_graph.structure, + m_graph=migration_graph.m_graph, + hops = summary_dict["hops"], + unique_hops=summary_dict["unique_hops"], + host_structure=migration_graph.host_structure, + symprec=migration_graph.symprec, + vac_mode=migration_graph.vac_mode + ) + + def as_migration_graph(self): + """ + Get a migration graph object from this document + """ + mg = MigrationGraph( + structure=self.structure, + m_graph=self.m_graph, + symprec=self.symprec, + vac_mode=self.vac_mode + ) + + # make sure there is a one-to-one mapping between the unique hops dictionary + def get_mg_uhop_key(ipos, epos): + isite = PeriodicSite(coords=self.ipos, lattice=self.structure.lattice) + esite = PeriodicSite(coords=self.epos, lattice=self.structure.lattice) + hop = MigrationHop(isite, esite, symm_structure=mg.symm_structure) + + for k,v in mg.unique_hops.items(): + if hop == v['hop']: + return k + + for k, v in self.unique_hops.items(): + mg_k = get_mg_uhop_key(v["ipos"], v["epos"]) + if k != mg_k: + raise RuntimeError("The unique hops in the reconstructed migration graph is different than the one in the document" + f"MigrationGraphDoc ({k}) MigrationGraph ({mg_k})") + + # TODO add any datamapping from the DB to reconstructed object here. + return mg + + + + + + + + From 29bb3dbfda6e071162126178c79182cc4b7413d8 Mon Sep 17 00:00:00 2001 From: jmmshn Date: Sun, 14 Mar 2021 20:58:25 -0700 Subject: [PATCH 4/8] delete spaces --- emmet-core/emmet/core/migration_graph.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/emmet-core/emmet/core/migration_graph.py b/emmet-core/emmet/core/migration_graph.py index e96e783f81..30eb60a80d 100644 --- a/emmet-core/emmet/core/migration_graph.py +++ b/emmet-core/emmet/core/migration_graph.py @@ -106,12 +106,5 @@ def get_mg_uhop_key(ipos, epos): f"MigrationGraphDoc ({k}) MigrationGraph ({mg_k})") # TODO add any datamapping from the DB to reconstructed object here. - return mg - - - - - - - + return mg \ No newline at end of file From 6cee3ff9c4a0991e67b10ee49949cb6e6ed50a9a Mon Sep 17 00:00:00 2001 From: jmmshn Date: Mon, 15 Mar 2021 18:14:16 -0700 Subject: [PATCH 5/8] changed task_id -> material_id --- emmet-builders/emmet/builders/vasp/thermo.py | 2 +- emmet-builders/requirements.txt | 4 ++-- emmet-core/emmet/core/migration_graph.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index 514e6fe957..3c65c5bc7a 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -249,7 +249,7 @@ def get_new_chemsys(self) -> Set: thermo_mat_ids = self.thermo.distinct(self.thermo.key) mat_ids = self.materials.distinct(self.materials.key, self.query) dif_task_ids = list(set(mat_ids) - set(thermo_mat_ids)) - q = {"task_id": {"$in": dif_task_ids}} + q = {"material_id": {"$in": dif_task_ids}} new_mat_chemsys = set(self.materials.distinct("chemsys", q)) self.logger.debug(f"Found {len(new_mat_chemsys)} new chemical systems") diff --git a/emmet-builders/requirements.txt b/emmet-builders/requirements.txt index fcc8b3e924..74cfa2f591 100644 --- a/emmet-builders/requirements.txt +++ b/emmet-builders/requirements.txt @@ -1,3 +1,3 @@ -pymatgen==2020.12.31 -maggma==0.25.0 +pymatgen>=2020.12.31 +maggma>=0.25.0 emmet-core diff --git a/emmet-core/emmet/core/migration_graph.py b/emmet-core/emmet/core/migration_graph.py index 30eb60a80d..80c1d72f30 100644 --- a/emmet-core/emmet/core/migration_graph.py +++ b/emmet-core/emmet/core/migration_graph.py @@ -102,8 +102,8 @@ def get_mg_uhop_key(ipos, epos): for k, v in self.unique_hops.items(): mg_k = get_mg_uhop_key(v["ipos"], v["epos"]) if k != mg_k: - raise RuntimeError("The unique hops in the reconstructed migration graph is different than the one in the document" - f"MigrationGraphDoc ({k}) MigrationGraph ({mg_k})") + raise RuntimeError("The unique hops in the reconstructed migration graph is different than the one " + f"in the document MigrationGraphDoc ({k}) MigrationGraph ({mg_k})") # TODO add any datamapping from the DB to reconstructed object here. From 13d21b1ed93e8620f74e37ca4178b4d290084fb7 Mon Sep 17 00:00:00 2001 From: jmmshn Date: Mon, 15 Mar 2021 22:04:56 -0700 Subject: [PATCH 6/8] updated error message updated error message update --- emmet-builders/emmet/builders/vasp/thermo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index 3c65c5bc7a..850b3922c6 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -114,7 +114,7 @@ def process_item(self, item: Tuple[List[str], List[ComputedEntry]]): ) chemsys = "-".join(elements) - self.logger.debug(f"Procesing {len(entries)} entries for {chemsys}") + self.logger.debug(f"Processing {len(entries)} entries for {chemsys}") material_entries = defaultdict(dict) pd_entries = [] @@ -128,6 +128,7 @@ def process_item(self, item: Tuple[List[str], List[ComputedEntry]]): elif "GGA" in material_entries[material_id]: pd_entries.append(material_entries[material_id]["GGA"]) pd_entries = self.compatibility.process_entries(pd_entries) + self.logger.debug(f"{len(pd_entries)} remain in {chemsys} after filtering") try: docs = ThermoDoc.from_entries(pd_entries) @@ -145,7 +146,7 @@ def process_item(self, item: Tuple[List[str], List[ComputedEntry]]): ) return [] except Exception as e: - self.logger.error(f"Got unexpected error: {e}") + self.logger.error(f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}") return [] return [d.dict() for d in docs] From 4f5a0f6ffe670ee5c8233e96413fb6a85dcffc61 Mon Sep 17 00:00:00 2001 From: jmmshn Date: Mon, 15 Mar 2021 22:07:52 -0700 Subject: [PATCH 7/8] return nothing --- emmet-builders/emmet/builders/vasp/thermo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index 850b3922c6..a04eed5b67 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -106,6 +106,8 @@ def get_items(self) -> Iterator[List[Dict]]: def process_item(self, item: Tuple[List[str], List[ComputedEntry]]): entries = item + if len(entries) == 0: + return [] entries = [ComputedStructureEntry.from_dict(entry) for entry in entries] # determine chemsys From b5e2c7978e5505e1aba57638d8be97b107548f62 Mon Sep 17 00:00:00 2001 From: jmmshn Date: Tue, 16 Mar 2021 14:27:13 -0700 Subject: [PATCH 8/8] changed pymatgen verion --- emmet-builders/emmet/builders/vasp/thermo.py | 7 +------ emmet-builders/requirements.txt | 2 +- emmet-builders/setup.py | 6 +++++- emmet-core/emmet/core/thermo.py | 7 +------ emmet-core/emmet/core/vasp/material.py | 14 ++++++-------- emmet-core/requirements.txt | 2 +- 6 files changed, 15 insertions(+), 23 deletions(-) diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index a04eed5b67..124c83222d 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -6,7 +6,7 @@ from maggma.core import Builder, Store from monty.json import MontyDecoder from pymatgen.core import Structure -from pymatgen.analysis.phase_diagram import PhaseDiagram +from pymatgen.analysis.phase_diagram import PhaseDiagramError from pymatgen.analysis.structure_analyzer import oxide_type from pymatgen.entries.compatibility import MaterialsProjectCompatibility from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry @@ -18,11 +18,6 @@ ) from emmet.core.thermo import ThermoDoc from emmet.core.vasp.calc_types import run_type -class PhaseDiagramError(Exception): - """ - An exception class for Phase Diagram generation. - """ - pass class Thermo(Builder): def __init__( diff --git a/emmet-builders/requirements.txt b/emmet-builders/requirements.txt index 74cfa2f591..15986ed13e 100644 --- a/emmet-builders/requirements.txt +++ b/emmet-builders/requirements.txt @@ -1,3 +1,3 @@ -pymatgen>=2020.12.31 maggma>=0.25.0 +-e git://github.com/materialsproject/pymatgen.git@master#egg=pymatgen emmet-core diff --git a/emmet-builders/setup.py b/emmet-builders/setup.py index 0ab2ceb2f9..ae5d444266 100644 --- a/emmet-builders/setup.py +++ b/emmet-builders/setup.py @@ -1,9 +1,13 @@ import datetime from pathlib import Path from setuptools import setup, find_namespace_packages +required = [] with open(Path(__file__).parent / "requirements.txt") as f: - required = f.read().splitlines() + for line in f.readlines(): + if "#egg=" in line: + continue + required.append(line) setup( name="emmet-builders", diff --git a/emmet-core/emmet/core/thermo.py b/emmet-core/emmet/core/thermo.py index 239b2fdcaf..e607e795df 100644 --- a/emmet-core/emmet/core/thermo.py +++ b/emmet-core/emmet/core/thermo.py @@ -4,7 +4,7 @@ from typing import ClassVar, Dict, List, Union from pydantic import BaseModel, Field -from pymatgen.analysis.phase_diagram import PhaseDiagram +from pymatgen.analysis.phase_diagram import PhaseDiagram, PhaseDiagramError from pymatgen.core import Composition from pymatgen.core.periodic_table import Element from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry @@ -12,11 +12,6 @@ from emmet.core.material_property import PropertyDoc from emmet.core.mpid import MPID from emmet.core.structure import StructureMetadata -class PhaseDiagramError(Exception): - """ - An exception class for Phase Diagram generation. - """ - pass class DecompositionProduct(BaseModel): """ diff --git a/emmet-core/emmet/core/vasp/material.py b/emmet-core/emmet/core/vasp/material.py index e9fed2fc2d..08f35d1a50 100644 --- a/emmet-core/emmet/core/vasp/material.py +++ b/emmet-core/emmet/core/vasp/material.py @@ -54,6 +54,12 @@ def from_tasks( quality_scores: quality scores for various calculation types use_statics: Use statics to define a material """ + if task_group == 0: + raise Exception(f"Must have more than one task in the group.") + + # Material ID + possible_mat_ids = [task.task_id for task in task_group] + material_id = min(possible_mat_ids) # Metadata last_updated = max(task.last_updated for task in task_group) @@ -77,14 +83,6 @@ def from_tasks( else structure_optimizations ) - # Material ID - possible_mat_ids = [task.task_id for task in structure_calcs] - possible_mat_ids = sorted(possible_mat_ids) - - if len(possible_mat_ids) == 0: - raise Exception(f"Could not find a material ID for {task_ids}") - else: - material_id = possible_mat_ids[0] def _structure_eval(task: TaskDocument): """ diff --git a/emmet-core/requirements.txt b/emmet-core/requirements.txt index 4dc90ff43f..946c60786b 100644 --- a/emmet-core/requirements.txt +++ b/emmet-core/requirements.txt @@ -1,4 +1,4 @@ -pymatgen==2021.2.16 +git+git://github.com/materialsproject/pymatgen@master#egg=pymatgen https://github.com/materialsvirtuallab/monty/archive/8d67c335bd5d8bb71ecc8ac732c82a53e0def4a1.zip pydantic==1.8.1 pybtex==0.24.0