diff --git a/emmet-api/app.py b/emmet-api/app.py index 4b6fbc89d8..4d5847431b 100644 --- a/emmet-api/app.py +++ b/emmet-api/app.py @@ -42,24 +42,30 @@ ) task_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="task_id", - collection_name="tasks", + uri=db_uri, database="mp_core", key="task_id", collection_name="tasks", ) thermo_store = MongoURIStore( uri=db_uri, database=f"mp_core_{db_suffix}", - key="material_id", + key="thermo_id", collection_name="thermo", ) - phase_diagram_store = MongoURIStore( + s3_phase_diagram_index = MongoURIStore( uri=db_uri, - database=f"mp_core_{db_suffix}", - key="chemsys", - collection_name="phase_diagram", + database="mp_core", + key="phase_diagram_id", + collection_name="s3_phase_diagram_index", + ) + + phase_diagram_store = S3Store( + index=s3_phase_diagram_index, + bucket="mp-phase-diagrams", + s3_workers=24, + key="phase_diagram_id", + searchable_fields=["chemsys", "thermo_type", "phase_diagram_id"], + compress=True, ) dielectric_store = MongoURIStore( @@ -84,31 +90,19 @@ ) phonon_bs_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="material_id", - collection_name="pmg_ph_bs", + uri=db_uri, database="mp_core", key="material_id", collection_name="pmg_ph_bs", ) eos_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="task_id", - collection_name="eos", + uri=db_uri, database="mp_core", key="task_id", collection_name="eos", ) similarity_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="material_id", - collection_name="similarity", + uri=db_uri, database="mp_core", key="material_id", collection_name="similarity", ) xas_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="spectrum_id", - collection_name="xas", + uri=db_uri, database="mp_core", key="spectrum_id", collection_name="xas", ) gb_store = MongoURIStore( @@ -119,31 +113,19 @@ ) fermi_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="task_id", - collection_name="fermi_surface", + uri=db_uri, database="mp_core", key="task_id", collection_name="fermi_surface", ) elasticity_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="task_id", - collection_name="elasticity", + uri=db_uri, database="mp_core", key="task_id", collection_name="elasticity", ) doi_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="task_id", - collection_name="dois", + uri=db_uri, database="mp_core", key="task_id", collection_name="dois", ) substrates_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="film_id", - collection_name="substrates", + uri=db_uri, database="mp_core", key="film_id", collection_name="substrates", ) surface_props_store = MongoURIStore( @@ -161,10 +143,7 @@ ) synth_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="_id", - collection_name="synth_descriptions", + uri=db_uri, database="mp_core", key="_id", collection_name="synth_descriptions", ) insertion_electrodes_store = MongoURIStore( @@ -175,10 +154,7 @@ ) molecules_store = MongoURIStore( - uri=db_uri, - database="mp_core", - key="task_id", - collection_name="molecules", + uri=db_uri, database="mp_core", key="task_id", collection_name="molecules", ) oxi_states_store = MongoURIStore( @@ -224,10 +200,7 @@ ) s3_dos_index = MongoURIStore( - uri=db_uri, - database="mp_core", - key="fs_id", - collection_name="s3_dos_index", + uri=db_uri, database="mp_core", key="fs_id", collection_name="s3_dos_index", ) s3_bs = S3Store( @@ -266,10 +239,7 @@ ) chgcar_url = MongoURIStore( - uri=db_uri, - database="mp_core", - key="fs_id", - collection_name="chgcar_s3_urls", + uri=db_uri, database="mp_core", key="fs_id", collection_name="chgcar_s3_urls", ) mpcomplete_store = MongoURIStore( diff --git a/emmet-api/emmet/api/routes/thermo/query_operators.py b/emmet-api/emmet/api/routes/thermo/query_operators.py index c34505e5a2..0f1eb54a4a 100644 --- a/emmet-api/emmet/api/routes/thermo/query_operators.py +++ b/emmet-api/emmet/api/routes/thermo/query_operators.py @@ -1,5 +1,6 @@ from typing import Optional from fastapi import Query +from maggma.api.utils import STORE_PARAMS from maggma.api.query_operator import QueryOperator @@ -25,3 +26,57 @@ def query( def ensure_indexes(self): # pragma: no cover keys = self._keys_from_query() return [(key, False) for key in keys] + + +class MultiThermoIDQuery(QueryOperator): + """ + Method to generate a query for different root-level thermo_id values + """ + + def query( + self, + thermo_ids: Optional[str] = Query( + None, description="Comma-separated list of thermo_id values to query on" + ), + ) -> STORE_PARAMS: + + crit = {} # type: dict + + if thermo_ids: + + thermo_id_list = [thermo_id.strip() for thermo_id in thermo_ids.split(",")] + + if len(thermo_id_list) == 1: + crit.update({"thermo_id": thermo_id_list[0]}) + else: + crit.update({"thermo_id": {"$in": thermo_id_list}}) + + return {"criteria": crit} + + +class MultiThermoTypeQuery(QueryOperator): + """ + Method to generate a query for different root-level thermo_type values + """ + + def query( + self, + thermo_types: Optional[str] = Query( + None, description="Comma-separated list of thermo_type values to query on" + ), + ) -> STORE_PARAMS: + + crit = {} # type: dict + + if thermo_types: + + thermo_type_list = [ + thermo_type.strip() for thermo_type in thermo_types.split(",") + ] + + if len(thermo_type_list) == 1: + crit.update({"thermo_type": thermo_type_list[0]}) + else: + crit.update({"thermo_type": {"$in": thermo_type_list}}) + + return {"criteria": crit} diff --git a/emmet-api/emmet/api/routes/thermo/resources.py b/emmet-api/emmet/api/routes/thermo/resources.py index cf22ba13a3..3625ae6535 100644 --- a/emmet-api/emmet/api/routes/thermo/resources.py +++ b/emmet-api/emmet/api/routes/thermo/resources.py @@ -8,7 +8,11 @@ SortQuery, SparseFieldsQuery, ) -from emmet.api.routes.thermo.query_operators import IsStableQuery +from emmet.api.routes.thermo.query_operators import ( + IsStableQuery, + MultiThermoIDQuery, + MultiThermoTypeQuery, +) from emmet.api.core.global_header import GlobalHeaderProcessor from emmet.api.routes.materials.query_operators import ( MultiMaterialIDQuery, @@ -37,7 +41,9 @@ def thermo_resource(thermo_store): thermo_store, ThermoDoc, query_operators=[ + MultiThermoIDQuery(), MultiMaterialIDQuery(), + MultiThermoTypeQuery(), FormulaQuery(), ChemsysQuery(), IsStableQuery(), @@ -45,13 +51,13 @@ def thermo_resource(thermo_store): SortQuery(), PaginationQuery(), SparseFieldsQuery( - ThermoDoc, default_fields=["material_id", "last_updated"] + ThermoDoc, default_fields=["thermo_id", "material_id", "last_updated"] ), ], header_processor=GlobalHeaderProcessor(), tags=["Thermo"], disable_validation=True, - timeout=MAPISettings().TIMEOUT + timeout=MAPISettings().TIMEOUT, ) return resource diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index fab900d59a..a038fc4a36 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -94,7 +94,9 @@ def __init__( targets.append(phase_diagram) # type: ignore - super().__init__(sources=sources, targets=targets, chunk_size=chunk_size, **kwargs) + super().__init__( + sources=sources, targets=targets, chunk_size=chunk_size, **kwargs + ) def ensure_indexes(self): """ @@ -189,6 +191,8 @@ def process_item(self, item: List[Dict]): self.logger.debug(f"Processing {len(entries)} entries for {chemsys}") + all_entry_types = {str(e.data["run_type"]) for e in entries} + docs_pd_pair_list = [] for compatability in self.compatibility: @@ -206,63 +210,88 @@ def process_item(self, item: List[Dict]): with warnings.catch_warnings(): warnings.simplefilter("ignore") with HiddenPrints(): - pd_entries = compatability.process_entries(entries) + if "R2SCAN" in all_entry_types: + combined_pd_entries = compatability.process_entries(entries) + only_scan_pd_entries = [ + e + for e in entries + if str(e.data["run_type"]) == "R2SCAN" + ] + + combined_pair = self._produce_pair( + combined_pd_entries, thermo_type, elements, chemsys + ) + scan_only_pair = self._produce_pair( + only_scan_pd_entries, + ThermoType.R2SCAN, + elements, + chemsys, + ) + + docs_pd_pair_list.append(combined_pair) + docs_pd_pair_list.append(scan_only_pair) + + else: + pd_entries = compatability.process_entries(entries) + pd_pair = self._produce_pair( + pd_entries, thermo_type, elements, chemsys + ) + + docs_pd_pair_list.append(pd_pair) + else: - all_entry_types = {e.data["run_type"] for e in entries} if len(all_entry_types) > 1: raise ValueError( "More than one functional type has been provided without a mixing scheme!" ) else: thermo_type = all_entry_types.pop() - pd_entries = entries - self.logger.debug(f"{len(pd_entries)} remain in {chemsys} after filtering") - try: - docs, pd = ThermoDoc.from_entries( - pd_entries, thermo_type, deprecated=False - ) + pd_pair = self._produce_pair(entries, thermo_type, elements, chemsys) - pd_data = None - - if self.phase_diagram: - if ( - self.num_phase_diagram_eles is None - or len(elements) <= self.num_phase_diagram_eles - ): - pd_id = "{}_{}".format(chemsys, str(thermo_type)) - pd_doc = PhaseDiagramDoc( - phase_diagram_id=pd_id, - chemsys=chemsys, - phase_diagram=pd, - thermo_type=thermo_type, - ) - - pd_data = jsanitize(pd_doc.dict(), allow_bson=True) - - docs_pd_pair = ( - jsanitize([d.dict() for d in docs], allow_bson=True), - [pd_data], - ) + docs_pd_pair_list.append(pd_pair) - docs_pd_pair_list.append(docs_pd_pair) + return docs_pd_pair_list - except PhaseDiagramError as p: - elsyms = [] - for e in entries: - elsyms.extend([el.symbol for el in e.composition.elements]) + def _produce_pair(self, pd_entries, thermo_type, elements, chemsys): + # Produce thermo and phase diagram pair + + try: + docs, pd = ThermoDoc.from_entries(pd_entries, thermo_type, deprecated=False) + + pd_data = None + + if self.phase_diagram: + if ( + self.num_phase_diagram_eles is None + or len(elements) <= self.num_phase_diagram_eles + ): + pd_id = "{}_{}".format(chemsys, str(thermo_type)) + pd_doc = PhaseDiagramDoc( + phase_diagram_id=pd_id, + chemsys=chemsys, + phase_diagram=pd, + thermo_type=thermo_type, + ) - self.logger.warning( - f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}" - ) - return [] - except Exception as e: - self.logger.error( - f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}" - ) - return [] + pd_data = jsanitize(pd_doc.dict(), allow_bson=True) - return docs_pd_pair_list + docs_pd_pair = ( + jsanitize([d.dict() for d in docs], allow_bson=True), + [pd_data], + ) + + return docs_pd_pair + + except PhaseDiagramError as p: + elsyms = [] + for e in pd_entries: + elsyms.extend([el.symbol for el in e.composition.elements]) + + self.logger.error( + f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}" + ) + return (None, None) def update_targets(self, items): """ diff --git a/emmet-core/emmet/core/thermo.py b/emmet-core/emmet/core/thermo.py index 0ab79f4a5c..9d455c7244 100644 --- a/emmet-core/emmet/core/thermo.py +++ b/emmet-core/emmet/core/thermo.py @@ -36,6 +36,7 @@ class DecompositionProduct(BaseModel): class ThermoType(ValueEnum): GGA_GGA_U = "GGA_GGA+U" GGA_GGA_U_R2SCAN = "GGA_GGA+U_R2SCAN" + R2SCAN = "R2SCAN" UNKNOWN = "UNKNOWN"