diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9fa25c6dd6..d236bdfe61 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,4 +7,4 @@ updates: - package-ecosystem: "github-actions" directory: "/" schedule: - interval: "weekly" \ No newline at end of file + interval: "weekly" diff --git a/emmet-core/emmet/core/electrode.py b/emmet-core/emmet/core/electrode.py index 52936b55a4..26f9ce58d9 100644 --- a/emmet-core/emmet/core/electrode.py +++ b/emmet-core/emmet/core/electrode.py @@ -10,6 +10,7 @@ from pymatgen.core.periodic_table import Element from pymatgen.entries.computed_entries import ComputedEntry +from emmet.core.mpid import MPID from emmet.core.utils import jsanitize @@ -126,7 +127,7 @@ def from_entries( cls, grouped_entries: List[ComputedEntry], working_ion_entry: ComputedEntry, - task_id: str, + task_id: Union[MPID, int], host_structure: Structure, ) -> Union["InsertionElectrodeDoc", None]: try: @@ -196,7 +197,7 @@ def from_composition_and_entries( composition: Composition, entries: List[ComputedEntry], working_ion_symbol: str, - task_id: str, + task_id: Union[MPID, int], ): ce = ConversionElectrode.from_composition_and_entries( comp=composition, diff --git a/emmet-core/emmet/core/material.py b/emmet-core/emmet/core/material.py index 9578c5a67f..a28ead3c39 100644 --- a/emmet-core/emmet/core/material.py +++ b/emmet-core/emmet/core/material.py @@ -9,6 +9,7 @@ from pymatgen.analysis.magnetism import CollinearMagneticStructureAnalyzer, Ordering from pymatgen.core import Structure +from emmet.core.mpid import MPID from emmet.core.structure import StructureMetadata @@ -18,7 +19,9 @@ class PropertyOrigin(BaseModel): """ name: str = Field(..., description="The property name") - task_id: str = Field(..., description="The calculation ID this property comes from") + task_id: Union[MPID, int] = Field( + ..., description="The calculation ID this property comes from" + ) last_updated: datetime = Field( description="The timestamp when this calculation was last updated", default_factory=datetime.utcnow, @@ -34,10 +37,10 @@ class MaterialsDoc(StructureMetadata): """ # Only material_id is required for all documents - material_id: str = Field( + material_id: Union[MPID, int] = Field( ..., description="The ID of this material, used as a universal reference across proeprty documents." - "This comes in the form: mp-******", + "This comes in the form and MPID or int", ) structure: Structure = Field( @@ -54,7 +57,7 @@ class MaterialsDoc(StructureMetadata): description="Initial structures used in the DFT optimizations corresponding to this material", ) - task_ids: Sequence[str] = Field( + task_ids: Sequence[Union[MPID, int]] = Field( [], title="Calculation IDs", description="List of Calculations IDs used to make this Materials Document", @@ -87,7 +90,7 @@ class MaterialsDoc(StructureMetadata): @classmethod def from_structure( # type: ignore[override] - cls: Type[T], structure: Structure, material_id: str, **kwargs + cls: Type[T], structure: Structure, material_id: Union[MPID, int], **kwargs ) -> T: """ Builds a materials document using the minimal amount of information diff --git a/emmet-core/emmet/core/material_property.py b/emmet-core/emmet/core/material_property.py index 8391d5b2f8..aed4441c8d 100644 --- a/emmet-core/emmet/core/material_property.py +++ b/emmet-core/emmet/core/material_property.py @@ -10,6 +10,7 @@ from pymatgen.core import Structure from emmet.core.material import PropertyOrigin +from emmet.core.mpid import MPID from emmet.core.structure import StructureMetadata S = TypeVar("S", bound="PropertyDoc") @@ -23,10 +24,10 @@ class PropertyDoc(StructureMetadata): """ property_name: ClassVar[str] - material_id: str = Field( + material_id: Union[MPID, int] = Field( ..., description="The ID of the material, used as a universal reference across proeprty documents." - "This comes in the form: mp-******", + "This comes in the form of an MPID or int", ) last_updated: datetime = Field( @@ -44,7 +45,7 @@ class PropertyDoc(StructureMetadata): @classmethod def from_structure( # type: ignore[override] - cls: Type[S], structure: Structure, material_id: str, **kwargs + cls: Type[S], structure: Structure, material_id: Union[MPID, int], **kwargs ) -> S: """ Builds a materials document using the minimal amount of information diff --git a/emmet-core/emmet/core/mpid.py b/emmet-core/emmet/core/mpid.py new file mode 100644 index 0000000000..18390540dc --- /dev/null +++ b/emmet-core/emmet/core/mpid.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass +from typing import Union + +NOTHING = object() + + +class MPID(str): + """ + A Materials Project type ID with a prefix and an integer + This class enables seemlessly mixing MPIDs and regular integer IDs + Prefixed IDs are considered less than non-prefixed IDs to enable proper + mixing with the Materials Project + """ + + def __lt__(self, other: Union["MPID", int, str]): + + # Always sort MPIDs before pure integer IDs + if isinstance(other, int): + return True + + self_parts = self.split("-") + other_parts = other.split("-") + + return self_parts < other_parts diff --git a/emmet-core/emmet/core/spectrum.py b/emmet-core/emmet/core/spectrum.py index 96be0d1bb3..d1056404fd 100644 --- a/emmet-core/emmet/core/spectrum.py +++ b/emmet-core/emmet/core/spectrum.py @@ -5,6 +5,7 @@ from pydantic import Field +from emmet.core.mpid import MPID from emmet.core.structure import StructureMetadata @@ -14,7 +15,7 @@ class SpectrumDoc(StructureMetadata): metadata on the structure the spectra pertains to """ - material_id: str = Field( + material_id: Union[MPID, int] = Field( ..., description="The ID of the material, used as a universal reference across proeprty documents." "This comes in the form: mp-******", diff --git a/emmet-core/emmet/core/structure_group.py b/emmet-core/emmet/core/structure_group.py index 45d5cfed25..f7899b39ad 100644 --- a/emmet-core/emmet/core/structure_group.py +++ b/emmet-core/emmet/core/structure_group.py @@ -10,6 +10,8 @@ from pymatgen.core import Composition, Structure from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry +from emmet.core.mpid import MPID + logger = logging.getLogger(__name__) @@ -48,7 +50,7 @@ class StructureGroupDoc(BaseModel): Group of structure """ - material_id: str = Field( + material_id: Union[MPID, int] = Field( None, description="The combined material_id of the grouped document is given by the numerically smallest task id ", ) diff --git a/emmet-core/emmet/core/stubs.py b/emmet-core/emmet/core/stubs.py index 5d41c3a8a1..fe58f0626a 100644 --- a/emmet-core/emmet/core/stubs.py +++ b/emmet-core/emmet/core/stubs.py @@ -4,9 +4,10 @@ outside the standard MSONable model """ from typing import Dict + import pymatgen.core.structure -from pymatgen.core.periodic_table import Element from pydantic import BaseModel +from pymatgen.core.periodic_table import Element """ The stub names are kept in sync with the actual classes so they diff --git a/emmet-core/emmet/core/thermo.py b/emmet-core/emmet/core/thermo.py index 61e58dff99..1ebb4d239d 100644 --- a/emmet-core/emmet/core/thermo.py +++ b/emmet-core/emmet/core/thermo.py @@ -10,6 +10,7 @@ from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry from emmet.core.material_property import PropertyDoc +from emmet.core.mpid import MPID from emmet.core.structure import StructureMetadata @@ -18,7 +19,7 @@ class DecompositionProduct(BaseModel): Entry metadata for a decomposition process """ - material_id: str = Field( + material_id: Union[MPID, int] = Field( None, description="The material this decomposition points to" ) formula: str = Field( diff --git a/emmet-core/emmet/core/vasp/material.py b/emmet-core/emmet/core/vasp/material.py index ca08dec5f5..aed3076d10 100644 --- a/emmet-core/emmet/core/vasp/material.py +++ b/emmet-core/emmet/core/vasp/material.py @@ -71,7 +71,7 @@ def from_tasks( # Material ID possible_mat_ids = [task.task_id for task in structure_optimizations] - possible_mat_ids = sorted(possible_mat_ids, key=ID_to_int) + possible_mat_ids = sorted(possible_mat_ids) if len(possible_mat_ids) == 0: raise Exception(f"Could not find a material ID for {task_ids}") @@ -157,17 +157,3 @@ def _structure_eval(task: TaskDocument): origins=origins, entries=entries, ) - - -def ID_to_int(s_id: str) -> Tuple[str, int]: - """ - Converts a string id to tuple - falls back to assuming ID is an Int if it can't process - Assumes string IDs are of form "[chars]-[int]" such as mp-234 - """ - if isinstance(s_id, str): - return (s_id.split("-")[0], int(str(s_id).split("-")[-1])) - elif isinstance(s_id, (int, float)): - return ("", s_id) - else: - return None diff --git a/emmet-core/emmet/core/vasp/task.py b/emmet-core/emmet/core/vasp/task.py index e2d170b63b..994199dc2d 100644 --- a/emmet-core/emmet/core/vasp/task.py +++ b/emmet-core/emmet/core/vasp/task.py @@ -11,6 +11,7 @@ from emmet.core import SETTINGS from emmet.core.math import Matrix3D, Vector3D +from emmet.core.mpid import MPID from emmet.core.structure import StructureMetadata from emmet.core.utils import ValueEnum from emmet.core.vasp.calc_types import ( @@ -119,7 +120,7 @@ class TaskDocument(StructureMetadata): orig_inputs: Dict[str, Dict] = Field( None, description="Summary of the original VASP inputs" ) - task_id: str = Field(None, description="the Task ID For this document") + task_id: Union[MPID, int] = Field(None, description="the Task ID For this document") tags: List[str] = Field([], description="Metadata tags for this task document") @property diff --git a/emmet-core/emmet/core/vasp/validation.py b/emmet-core/emmet/core/vasp/validation.py index 213cec586a..ed487ede00 100644 --- a/emmet-core/emmet/core/vasp/validation.py +++ b/emmet-core/emmet/core/vasp/validation.py @@ -6,6 +6,7 @@ from pymatgen.core import Structure from emmet.core import SETTINGS +from emmet.core.mpid import MPID from emmet.core.utils import DocEnum from emmet.core.vasp.task import TaskDocument @@ -24,7 +25,9 @@ class ValidationDoc(BaseModel): Validation document for a VASP calculation """ - task_id: str = Field(..., description="The task_id for this validation document") + task_id: Union[MPID, int] = Field( + ..., description="The task_id for this validation document" + ) valid: bool = Field(False, description="Whether this task is valid or not") last_updated: datetime = Field( description="Last updated date for this document", diff --git a/emmet-core/emmet/core/xas.py b/emmet-core/emmet/core/xas.py index ac3b9aac00..be177c898f 100644 --- a/emmet-core/emmet/core/xas.py +++ b/emmet-core/emmet/core/xas.py @@ -1,11 +1,12 @@ from datetime import datetime -from typing import List, Optional +from typing import List, Optional, Union from pydantic import BaseModel, Field, root_validator from pymatgen.analysis.xas.spectrum import XAS from pymatgen.core import Structure from pymatgen.core.periodic_table import Element +from emmet.core.mpid import MPID from emmet.core.spectrum import SpectrumDoc from emmet.core.utils import ValueEnum @@ -62,7 +63,7 @@ class XASDoc(SpectrumDoc): def from_spectrum( cls, xas_spectrum: XAS, - material_id: str, + material_id: Union[MPID, int], **kwargs, ): spectrum_type = xas_spectrum.spectrum_type diff --git a/emmet-core/emmet/core/xrd.py b/emmet-core/emmet/core/xrd.py index 928f7b5872..ba4bdd5d22 100644 --- a/emmet-core/emmet/core/xrd.py +++ b/emmet-core/emmet/core/xrd.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Union import numpy as np from pydantic import BaseModel, Field, root_validator @@ -11,6 +11,7 @@ from pymatgen.core import Structure from pymatgen.core.periodic_table import Element +from emmet.core.mpid import MPID from emmet.core.spectrum import SpectrumDoc from emmet.core.utils import ValueEnum, jsanitize @@ -60,7 +61,7 @@ def get_target_and_edge(cls, values: Dict): @classmethod def from_structure( # type: ignore[override] cls, - material_id: str, + material_id: Union[MPID, int], spectrum_id: str, structure: Structure, wavelength: float, @@ -88,7 +89,7 @@ def from_structure( # type: ignore[override] @classmethod def from_target( cls, - material_id: str, + material_id: Union[MPID, int], structure: Structure, target: Element, edge: Edge, diff --git a/tests/emmet-core/test_mpid.py b/tests/emmet-core/test_mpid.py new file mode 100644 index 0000000000..ba70c5343c --- /dev/null +++ b/tests/emmet-core/test_mpid.py @@ -0,0 +1,10 @@ +from emmet.core.mpid import MPID + + +def test_mpid(): + + assert MPID("mp-3") == MPID("mp-3") + assert MPID("mp-3") < 3 + assert MPID("mp-3") < MPID("np-3") + assert MPID("mp-3") > MPID("mp-2") + assert 3 > MPID("mp-3")