Skip to content

Commit

Permalink
Enable downloads from AWS Open Data for all collections (#884)
Browse files Browse the repository at this point in the history
* Ensure json schema fields are retained

* Add direct s3 querying for regular docs

* Fix timeout passing

* Fix s3 threadsafety

* Fix for latest data re-org

* Use smart open

* Fix deserialization performance

* Switch to s3 for all doc queryies

* Fix remaining type issues

* More type fixes

* Remove print

* Fix deprecated field in materials and s3

* Fix type issues

* Fix structure import

* Fix open data json sanitization

* Add projection to multithreaded s3 func

* Remove default s3 query

* Add comments for clarity in s3 func

* Update pbar for s3 download

* Switch grain boundary prefix

* Fix s3 suffix delim

* Fix task retrieval support

* Linting

* Docstring noqa

* Add smart_open to deps

* Linting

* More linting

* Add missing docstring

* Docstring arg rename

* Fix tests

* Fix bs s3 query

* Fix chgcar query

* Fix dos query

* More linting

* Fix s3 keys

* More s3 fixes

* Last s3 query fix

* Linting

* Skip alloys generic test

* Fix alloys skip

* Remove task chemsys query

* Linting

* Fix similarity search name

* More similarity fixes

* Remove fermi rester

* Remove fermi ref

* More fermi fixes

* Linting

* Fix s3 decoding

* Add deprecated filter for s3

* Fix object key pagination

---------

Co-authored-by: Jason Munro <jasonmunro@Jasons-MBP.dhcp.lbl.gov>
  • Loading branch information
munrojm and Jason Munro committed Feb 28, 2024
1 parent 8724ebe commit daefc5f
Show file tree
Hide file tree
Showing 13 changed files with 433 additions and 216 deletions.
382 changes: 281 additions & 101 deletions mp_api/client/core/client.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions mp_api/client/core/utils.py
Expand Up @@ -73,6 +73,7 @@ def api_sanitize(
model_fields_to_leave = {f[1] for f in fields_tuples if model.__name__ == f[0]}
for name in model.model_fields:
field = model.model_fields[name]
field_json_extra = field.json_schema_extra
field_type = field.annotation

if field_type is not None and allow_dict_msonable:
Expand All @@ -87,6 +88,7 @@ def api_sanitize(
new_field = FieldInfo.from_annotated_attribute(
Optional[field_type], None
)
new_field.json_schema_extra = field_json_extra or {}
model.model_fields[name] = new_field

model.model_rebuild(force=True)
Expand Down
60 changes: 33 additions & 27 deletions mp_api/client/mprester.py
@@ -1,17 +1,19 @@
from __future__ import annotations

import itertools
import json
import os
import warnings
from functools import cache, lru_cache
from json import loads
from os import environ
from typing import Literal

from emmet.core.electronic_structure import BSPathType
from emmet.core.mpid import MPID
from emmet.core.settings import EmmetSettings
from emmet.core.tasks import TaskDoc
from emmet.core.vasp.calc_types import CalcType
from monty.json import MontyDecoder
from packaging import version
from pymatgen.analysis.phase_diagram import PhaseDiagram
from pymatgen.analysis.pourbaix_diagram import IonEntry
Expand Down Expand Up @@ -39,7 +41,6 @@
ElectrodeRester,
ElectronicStructureRester,
EOSRester,
FermiRester,
GrainBoundaryRester,
MagnetismRester,
OxidationStatesRester,
Expand All @@ -66,10 +67,12 @@
)

_EMMET_SETTINGS = EmmetSettings() # type: ignore
_MAPI_SETTINGS = MAPIClientSettings() # type: ignore
_MAPI_SETTINGS = MAPIClientSettings() # typeL ignore # type: ignore

DEFAULT_API_KEY = environ.get("MP_API_KEY", None)
DEFAULT_ENDPOINT = environ.get("MP_API_ENDPOINT", "https://api.materialsproject.org/")
DEFAULT_API_KEY = os.environ.get("MP_API_KEY", None)
DEFAULT_ENDPOINT = os.environ.get(
"MP_API_ENDPOINT", "https://api.materialsproject.org/"
)


class MPRester:
Expand All @@ -86,8 +89,7 @@ class MPRester:
similarity: SimilarityRester
tasks: TaskRester
xas: XASRester
fermi: FermiRester
grain_boundary: GrainBoundaryRester
grain_boundaries: GrainBoundaryRester
substrates: SubstratesRester
surface_properties: SurfacePropertiesRester
phonon: PhononRester
Expand Down Expand Up @@ -195,7 +197,7 @@ def __init__(
"tasks",
"xas",
"fermi",
"grain_boundary",
"grain_boundaries",
"substrates",
"surface_properties",
"phonon",
Expand Down Expand Up @@ -340,7 +342,7 @@ def __molecules_getattr__(_self, attr):
return rester

MaterialsRester.__getattr__ = __materials_getattr__ # type: ignore
MoleculeRester.__getattr__ = __molecules_getattr__ # type: ignore
MoleculeRester.__getattr__ = __molecules_getattr__ # type: ignore

for attr, rester in core_resters.items():
setattr(
Expand Down Expand Up @@ -598,14 +600,15 @@ def get_structures(
input_params = {"formula": chemsys_formula}

if final:
return [
doc.structure if self.use_document_model else doc["structure"] # type: ignore
for doc in self.materials.search(
**input_params, # type: ignore
all_fields=False,
fields=["structure"],
)
]
docs = self.materials.search(
**input_params, # type: ignore
all_fields=False,
fields=["structure"],
)
if not self.use_document_model:
return [doc["structure"] for doc in docs] # type: ignore

return [doc.structure for doc in docs] # type: ignore
else:
structures = []

Expand All @@ -614,11 +617,12 @@ def get_structures(
all_fields=False,
fields=["initial_structures"],
):
structures.extend(
initial_structures = (
doc.initial_structures # type: ignore
if self.use_document_model
else doc["initial_structures"] # type: ignore
)
structures.extend(initial_structures)

return structures

Expand Down Expand Up @@ -1301,7 +1305,7 @@ def get_wulff_shape(self, material_id: str):
)
miller_energy_map = {}
for surf in surfaces:
miller = tuple(surf.miller_index)
miller = tuple(surf.miller_index) if surf.miller_index else ()
# Prefer reconstructed surfaces, which have lower surface energies.
if (miller not in miller_energy_map) or surf.is_reconstructed:
miller_energy_map[miller] = surf.surface_energy
Expand Down Expand Up @@ -1339,20 +1343,22 @@ def get_charge_density_from_material_id(
else x["last_updated"], # type: ignore
)

result = (
decoder = MontyDecoder().decode if self.monty_decode else json.loads
chgcar = (
self.tasks._query_open_data(
bucket="materialsproject-parsed",
prefix="chgcars",
key=str(latest_doc.task_id),
)
key=f"chgcars/{str(latest_doc.task_id)}.json.gz",
decoder=decoder,
fields=["data"],
)[0]
or {}
)

chgcar = result.get("data", None)

if chgcar is None:
if not chgcar:
raise MPRestError(f"No charge density fetched for {material_id}.")

chgcar = chgcar[0]["data"] # type: ignore

if inc_task_doc:
task_doc = self.tasks.search(
task_ids=latest_doc.task_id
Expand Down Expand Up @@ -1384,7 +1390,7 @@ def get_download_info(self, material_ids, calc_types=None, file_patterns=None):
)

meta = {}
for doc in self.materials.search(
for doc in self.materials.search( # type: ignore
task_ids=material_ids,
fields=["calc_types", "deprecated_tasks", "material_id"],
):
Expand Down
3 changes: 1 addition & 2 deletions mp_api/client/routes/materials/__init__.py
Expand Up @@ -13,8 +13,7 @@
ElectronicStructureRester,
)
from .eos import EOSRester
from .fermi import FermiRester
from .grain_boundary import GrainBoundaryRester
from .grain_boundaries import GrainBoundaryRester
from .magnetism import MagnetismRester
from .oxidation_states import OxidationStatesRester
from .phonon import PhononRester
Expand Down
30 changes: 20 additions & 10 deletions mp_api/client/routes/materials/electronic_structure.py
@@ -1,5 +1,6 @@
from __future__ import annotations

import json
import warnings
from collections import defaultdict

Expand All @@ -8,6 +9,7 @@
DOSProjectionType,
ElectronicStructureDoc,
)
from monty.json import MontyDecoder
from pymatgen.analysis.magnetism.analyzer import Ordering
from pymatgen.core.periodic_table import Element
from pymatgen.electronic_structure.core import OrbitalType, Spin
Expand Down Expand Up @@ -232,12 +234,16 @@ def get_bandstructure_from_task_id(self, task_id: str):
Returns:
bandstructure (BandStructure): BandStructure or BandStructureSymmLine object
"""
decoder = MontyDecoder().decode if self.monty_decode else json.loads
result = self._query_open_data(
bucket="materialsproject-parsed", prefix="bandstructures", key=task_id
)

if result.get("data", None) is not None:
return result["data"]
bucket="materialsproject-parsed",
key=f"bandstructures/{task_id}.json.gz",
decoder=decoder,
fields=["data"],
)[0]

if result:
return result[0]["data"]
else:
raise MPRestError("No object found")

Expand Down Expand Up @@ -418,12 +424,16 @@ def get_dos_from_task_id(self, task_id: str):
Returns:
bandstructure (CompleteDos): CompleteDos object
"""
decoder = MontyDecoder().decode if self.monty_decode else json.loads
result = self._query_open_data(
bucket="materialsproject-parsed", prefix="dos", key=task_id
)

if result.get("data", None) is not None:
return result["data"]
bucket="materialsproject-parsed",
key=f"dos/{task_id}.json.gz",
decoder=decoder,
fields=["data"],
)[0]

if result:
return result[0]["data"] # type: ignore
else:
raise MPRestError("No object found")

Expand Down
58 changes: 0 additions & 58 deletions mp_api/client/routes/materials/fermi.py

This file was deleted.

5 changes: 1 addition & 4 deletions mp_api/client/routes/materials/materials.py
Expand Up @@ -19,7 +19,6 @@
ElectrodeRester,
ElectronicStructureRester,
EOSRester,
FermiRester,
GrainBoundaryRester,
MagnetismRester,
OxidationStatesRester,
Expand Down Expand Up @@ -50,8 +49,7 @@ class MaterialsRester(BaseRester[MaterialsDoc]):
"similarity",
"tasks",
"xas",
"fermi",
"grain_boundary",
"grain_boundaries",
"substrates",
"surface_properties",
"phonon",
Expand Down Expand Up @@ -81,7 +79,6 @@ class MaterialsRester(BaseRester[MaterialsDoc]):
similarity: SimilarityRester
tasks: TaskRester
xas: XASRester
fermi: FermiRester
grain_boundary: GrainBoundaryRester
substrates: SubstratesRester
surface_properties: SurfacePropertiesRester
Expand Down
6 changes: 2 additions & 4 deletions mp_api/client/routes/materials/similarity.py
Expand Up @@ -11,10 +11,9 @@ class SimilarityRester(BaseRester[SimilarityDoc]):
document_model = SimilarityDoc # type: ignore
primary_key = "material_id"

def search_docs(
def search(
self,
material_ids: str | list[str] | None = None,
deprecated: bool | None = False,
num_chunks: int | None = None,
chunk_size: int = 1000,
all_fields: bool = True,
Expand All @@ -25,7 +24,6 @@ def search_docs(
Arguments:
material_ids (str, List[str]): A single Material ID string or list of strings
(e.g., mp-149, [mp-149, mp-13]).
deprecated (bool): Whether the material is tagged as deprecated.
num_chunks (int): Maximum number of chunks of data to yield. None will yield all possible.
chunk_size (int): Number of data entries per chunk.
all_fields (bool): Whether to return all fields in the document. Defaults to True.
Expand All @@ -35,7 +33,7 @@ def search_docs(
Returns:
([SimilarityDoc], [dict]) List of similarity documents or dictionaries.
"""
query_params = {"deprecated": deprecated} # type: dict
query_params = {} # type: dict

if material_ids:
if isinstance(material_ids, str):
Expand Down
9 changes: 0 additions & 9 deletions mp_api/client/routes/materials/tasks.py
Expand Up @@ -36,7 +36,6 @@ def get_trajectory(self, task_id):
def search(
self,
task_ids: str | list[str] | None = None,
chemsys: str | list[str] | None = None,
elements: list[str] | None = None,
exclude_elements: list[str] | None = None,
formula: str | list[str] | None = None,
Expand All @@ -50,8 +49,6 @@ def search(
Arguments:
task_ids (str, List[str]): List of Materials Project IDs to return data for.
chemsys (str, List[str]): A chemical system or list of chemical systems
(e.g., Li-Fe-O, Si-*, [Si-O, Li-Fe-P]).
elements (List[str]): A list of elements.
exclude_elements (List[str]): A list of elements to exclude.
formula (str, List[str]): A formula including anonymized formula
Expand Down Expand Up @@ -84,12 +81,6 @@ def search(
if exclude_elements:
query_params.update({"exclude_elements": ",".join(exclude_elements)})

if chemsys:
if isinstance(chemsys, str):
chemsys = [chemsys]

query_params.update({"chemsys": ",".join(chemsys)})

if last_updated:
query_params.update(
{
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Expand Up @@ -28,6 +28,7 @@ dependencies = [
"requests>=2.23.0",
"monty>=2023.9.25",
"emmet-core>=0.78.0rc3",
"smart_open",
]
dynamic = ["version"]

Expand Down

0 comments on commit daefc5f

Please sign in to comment.