Skip to content

Commit

Permalink
Add ability to retrieve SNLs
Browse files Browse the repository at this point in the history
  • Loading branch information
mkhorton committed Sep 5, 2021
1 parent 38b7e46 commit 7e7c554
Showing 1 changed file with 88 additions and 16 deletions.
104 changes: 88 additions & 16 deletions pymatgen/ext/optimade.py
Expand Up @@ -13,6 +13,7 @@

from pymatgen.core.periodic_table import DummySpecies
from pymatgen.core.structure import Structure
from pymatgen.util.provenance import StructureNL
from pymatgen.util.sequence import PBar

# TODO: importing optimade-python-tool's data structures will make more sense
Expand Down Expand Up @@ -78,9 +79,6 @@ def __init__(self, aliases_or_resource_urls: Optional[Union[str, List[str]]] = N
To refresh this list of aliases, generated from the current list of OPTIMADE providers
at optimade.org, call the refresh_aliases() method.
This interface is maintained by @mkhorton, please contact him directly with bug reports
or open an Issue in the pymatgen repository.
Args:
aliases_or_resource_urls: the alias or structure resource URL or a list of
aliases or resource URLs, if providing the resource URL directly it should not
Expand Down Expand Up @@ -178,9 +176,9 @@ def _build_filter(

def get_structures(
self, elements=None, nelements=None, nsites=None, chemical_formula_anonymous=None, chemical_formula_hill=None,
) -> Dict[str, Structure]:
) -> Dict[str, Dict[str, Structure]]:
"""
Retrieve structures from the OPTIMADE database.
Retrieve Structures from OPTIMADE providers.
Not all functionality of OPTIMADE is currently exposed in this convenience method. To
use a custom filter, call get_structures_with_filter().
Expand All @@ -192,7 +190,7 @@ def get_structures(
chemical_formula_anonymous: Anonymous chemical formula
chemical_formula_hill: Chemical formula following Hill convention
Returns: Dict of Structures keyed by that database's id system
Returns: Dict of (Dict Structures keyed by that database's id system) keyed by provider
"""

optimade_filter = self._build_filter(
Expand All @@ -205,7 +203,40 @@ def get_structures(

return self.get_structures_with_filter(optimade_filter)

def get_structures_with_filter(self, optimade_filter: str) -> Dict[str, Structure]:
def get_snls(
self, elements=None, nelements=None, nsites=None, chemical_formula_anonymous=None, chemical_formula_hill=None,
) -> Dict[str, Dict[str, StructureNL]]:
"""
Retrieve StructureNL from OPTIMADE providers.
A StructureNL is an object provided by pymatgen which combines Structure with
associated metadata, such as the URL is was downloaded from and any additional namespaced
data.
Not all functionality of OPTIMADE is currently exposed in this convenience method. To
use a custom filter, call get_structures_with_filter().
Args:
elements: List of elements
nelements: Number of elements, e.g. 4 or [2, 5] for the range >=2 and <=5
nsites: Number of sites, e.g. 4 or [2, 5] for the range >=2 and <=5
chemical_formula_anonymous: Anonymous chemical formula
chemical_formula_hill: Chemical formula following Hill convention
Returns: Dict of (Dict of StructureNLs keyed by that database's id system) keyed by provider
"""

optimade_filter = self._build_filter(
elements=elements,
nelements=nelements,
nsites=nsites,
chemical_formula_anonymous=chemical_formula_anonymous,
chemical_formula_hill=chemical_formula_hill,
)

return self.get_snls_with_filter(optimade_filter)

def get_structures_with_filter(self, optimade_filter: str) -> Dict[str, Dict[str, Structure]]:
"""
Get structures satisfying a given OPTIMADE filter.
Expand All @@ -215,8 +246,26 @@ def get_structures_with_filter(self, optimade_filter: str) -> Dict[str, Structur
Returns: Dict of Structures keyed by that database's id system
"""

all_snls = self.get_snls_with_filter(optimade_filter)
all_structures = {}

for identifier, snls_dict in all_snls.items():
all_structures[identifier] = {k: snl.structure for k, snl in snls_dict.items()}

return all_structures

def get_snls_with_filter(self, optimade_filter: str) -> Dict[str, Dict[str, StructureNL]]:
"""
Get structures satisfying a given OPTIMADE filter.
Args:
filter: An OPTIMADE-compliant filter
Returns: Dict of Structures keyed by that database's id system
"""

all_snls = {}

for identifier, resource in self.resources.items():

fields = "response_fields=lattice_vectors,cartesian_site_positions,species,species_at_sites"
Expand All @@ -227,7 +276,7 @@ def get_structures_with_filter(self, optimade_filter: str) -> Dict[str, Structur

json = self.session.get(url, timeout=self._timeout).json()

structures = self._get_structures_from_resource(json, url)
structures = self._get_snls_from_resource(json, url, identifier)

pbar = PBar(total=json["meta"].get("data_returned", 0), desc=identifier, initial=len(structures))

Expand All @@ -238,13 +287,13 @@ def get_structures_with_filter(self, optimade_filter: str) -> Dict[str, Structur
if isinstance(next_link, dict) and "href" in next_link:
next_link = next_link["href"]
json = self.session.get(next_link, timeout=self._timeout).json()
additional_structures = self._get_structures_from_resource(json, url)
additional_structures = self._get_snls_from_resource(json, url, identifier)
structures.update(additional_structures)
pbar.update(len(additional_structures))

if structures:

all_structures[identifier] = structures
all_snls[identifier] = structures

except Exception as exc:

Expand All @@ -254,12 +303,12 @@ def get_structures_with_filter(self, optimade_filter: str) -> Dict[str, Structur
f"Could not retrieve required information from provider {identifier} and url {url}: {exc}"
)

return all_structures
return all_snls

@staticmethod
def _get_structures_from_resource(json, url):
def _get_snls_from_resource(json, url, identifier) -> Dict[str, StructureNL]:

structures = {}
snls = {}

exceptions = set()

Expand All @@ -279,6 +328,7 @@ def _get_comp(sp_dict):
for data in json["data"]:

# TODO: check the spec! and remove this try/except (are all providers following spec?)
# e.g. can check data["type"] == "structures"

try:
# e.g. COD
Expand All @@ -288,7 +338,19 @@ def _get_comp(sp_dict):
coords=data["attributes"]["cartesian_site_positions"],
coords_are_cartesian=True,
)
structures[data["id"]] = structure
namespaced_data = {k: v for k, v in data.items() if k.startswith("_")}

# TODO: follow `references` to add reference information here
snl = StructureNL(
structure,
authors={},
history=[{"name": identifier, "url": url, "description": {"id": data["id"]}}],
data={"_optimade": namespaced_data},
)

snls[data["id"]] = snl

# TODO: bare exception, remove...
except Exception:

try:
Expand All @@ -299,7 +361,17 @@ def _get_comp(sp_dict):
coords=data["attributes"]["cartesian_site_positions"],
coords_are_cartesian=True,
)
structures[data["id"]] = structure
namespaced_data = {k: v for k, v in data["attributes"].items() if k.startswith("_")}

# TODO: follow `references` to add reference information here
snl = StructureNL(
structure,
authors={},
history=[{"name": identifier, "url": url, "description": {"id": data["id"]}}],
data={"_optimade": namespaced_data},
)

snls[data["id"]] = snl

except Exception as exc:
if str(exc) not in exceptions:
Expand All @@ -308,7 +380,7 @@ def _get_comp(sp_dict):
if exceptions:
_logger.error(f'Failed to parse returned data for {url}: {", ".join(exceptions)}')

return structures
return snls

def _validate_provider(self, provider_url) -> Optional[Provider]:
"""
Expand Down

0 comments on commit 7e7c554

Please sign in to comment.