Skip to content

Commit

Permalink
Enable DatasetRef JSON serialization via pydantic
Browse files Browse the repository at this point in the history
Retain the from_json/to_json and from_simple/to_simple APIs
that use universe/registry/minimal parameters but instead make
the simple form return a class that is a pydantic model.
This allows the JSON to be validated through pydantic.

This has required a change to the DatasetType minimal
form and DimensionGraph form since pydantic seemingly requires
all models are dicts.
  • Loading branch information
timj committed Feb 5, 2021
1 parent ac0e15b commit 83d0dfa
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 54 deletions.
78 changes: 61 additions & 17 deletions python/lsst/daf/butler/core/datasets/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations

__all__ = ["AmbiguousDatasetError", "DatasetRef"]
__all__ = ["AmbiguousDatasetError", "DatasetRef", "SerializedDatasetRef"]

from typing import (
TYPE_CHECKING,
Expand All @@ -32,12 +32,14 @@
Tuple,
)

from pydantic import BaseModel, StrictStr, ConstrainedInt, validator

from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse
from ..configSupport import LookupKey
from ..utils import immutable
from ..named import NamedKeyDict
from .type import DatasetType
from ..json import from_json_generic, to_json_generic
from .type import DatasetType, SerializedDatasetType
from ..json import from_json_pydantic, to_json_pydantic

if TYPE_CHECKING:
from ...registry import Registry
Expand All @@ -49,6 +51,38 @@ class AmbiguousDatasetError(Exception):
"""


class PositiveInt(ConstrainedInt):
ge = 0
strict = True


class SerializedDatasetRef(BaseModel):
id: Optional[PositiveInt] = None
datasetType: Optional[SerializedDatasetType] = None
dataId: Optional[Dict[str, Any]] = None # Do not use specialist pydantic model for this
run: Optional[StrictStr] = None
component: Optional[StrictStr] = None

@validator("dataId")
def check_dataId(cls, v: Any, values: Dict[str, Any]) -> Any:
if (d := "datasetType") in values and values[d] is None:
raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
return v

@validator("run")
def check_run(cls, v: Any, values: Dict[str, Any]) -> Any:
if (i := "id") in values and values[i] is None:
raise ValueError("'run' cannot be provided unless 'id' is.")
return v

@validator("component")
def check_component(cls, v: Any, values: Dict[str, Any]) -> Any:
# Component should not be given if datasetType is given
if (d := "datasetType") in values and values[d] is not None:
raise ValueError("datasetType can not be set if component is given.")
return v


@immutable
class DatasetRef:
"""Reference to a Dataset in a `Registry`.
Expand Down Expand Up @@ -82,6 +116,7 @@ class DatasetRef:
provided but ``run`` is not.
"""

_serializedType = SerializedDatasetRef
__slots__ = ("id", "datasetType", "dataId", "run",)

def __init__(
Expand Down Expand Up @@ -153,7 +188,7 @@ def __lt__(self, other: Any) -> bool:
# Compare tuples in the priority order
return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)

def to_simple(self, minimal: bool = False) -> Dict:
def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
"""Convert this class to a simple python type suitable for
serialization.
Expand All @@ -180,7 +215,7 @@ def to_simple(self, minimal: bool = False) -> Dict:
# We can still be a little minimalist with a component
# but we will also need to record the datasetType component
simple["component"] = self.datasetType.component()
return simple
return SerializedDatasetRef(**simple)

# Convert to a dict form
as_dict: Dict[str, Any] = {"datasetType": self.datasetType.to_simple(minimal=minimal),
Expand All @@ -192,10 +227,10 @@ def to_simple(self, minimal: bool = False) -> Dict:
as_dict["run"] = self.run
as_dict["id"] = self.id

return as_dict
return SerializedDatasetRef(**as_dict)

@classmethod
def from_simple(cls, simple: Dict,
def from_simple(cls, simple: SerializedDatasetRef,
universe: Optional[DimensionUniverse] = None,
registry: Optional[Registry] = None) -> DatasetRef:
"""Construct a new object from the data returned from the `to_simple`
Expand All @@ -221,14 +256,16 @@ def from_simple(cls, simple: Dict,

# Minimalist component will just specify component and id and
# require registry to reconstruct
if set(simple).issubset({"id", "component"}):
if set(simple.dict(exclude_unset=True, exclude_defaults=True)).issubset({"id", "component"}):
if registry is None:
raise ValueError("Registry is required to construct component DatasetRef from integer id")
ref = registry.getDataset(simple["id"])
if simple.id is None:
raise ValueError("For minimal DatasetRef the ID must be defined.")
ref = registry.getDataset(simple.id)
if ref is None:
raise RuntimeError(f"No matching dataset found in registry for id {simple['id']}")
if "component" in simple:
ref = ref.makeComponentRef(simple["component"])
raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
if simple.component:
ref = ref.makeComponentRef(simple.component)
return ref

if universe is None and registry is None:
Expand All @@ -241,13 +278,20 @@ def from_simple(cls, simple: Dict,
# this is for mypy
raise ValueError("Unable to determine a usable universe")

datasetType = DatasetType.from_simple(simple["datasetType"], universe=universe, registry=registry)
dataId = DataCoordinate.from_simple(simple["dataId"], universe=universe)
if simple.datasetType is None:
# mypy
raise ValueError("The DatasetType must be specified to construct a DatasetRef")
datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)

if simple.dataId is None:
# mypy
raise ValueError("The DataId must be specified to construct a DatasetRef")
dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
return cls(datasetType, dataId,
id=simple["id"], run=simple["run"])
id=simple.id, run=simple.run)

to_json = to_json_generic
from_json = classmethod(from_json_generic)
to_json = to_json_pydantic
from_json = classmethod(from_json_pydantic)

@classmethod
def _unpickle(
Expand Down
73 changes: 45 additions & 28 deletions python/lsst/daf/butler/core/datasets/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from __future__ import annotations

__all__ = ["DatasetType"]
__all__ = ["DatasetType", "SerializedDatasetType"]

from copy import deepcopy
import re
Expand All @@ -42,11 +42,12 @@
Union,
)

from pydantic import BaseModel, StrictStr, StrictBool

from ..storageClass import StorageClass, StorageClassFactory
from ..dimensions import DimensionGraph
from ..dimensions import DimensionGraph, SerializedDimensionGraph
from ..configSupport import LookupKey
from ..json import from_json_generic, to_json_generic
from ..json import from_json_pydantic, to_json_pydantic

if TYPE_CHECKING:
from ..dimensions import Dimension, DimensionUniverse
Expand All @@ -59,6 +60,14 @@ def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
return MappingProxyType(data)


class SerializedDatasetType(BaseModel):
name: StrictStr
storageClass: Optional[StrictStr] = None
dimensions: Optional[SerializedDimensionGraph] = None
parentStorageClass: Optional[StrictStr] = None
isCalibration: StrictBool = False


class DatasetType:
r"""A named category of Datasets that defines how they are organized,
related, and stored.
Expand Down Expand Up @@ -104,6 +113,8 @@ class DatasetType:
"_parentStorageClass", "_parentStorageClassName",
"_isCalibration")

_serializedType = SerializedDatasetType

VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")

PlaceholderParentStorageClass = StorageClass("PlaceHolder")
Expand Down Expand Up @@ -474,7 +485,7 @@ def _lookupNames(self) -> Tuple[LookupKey, ...]:

return lookups + self.storageClass._lookupNames()

def to_simple(self, minimal: bool = False) -> Union[Dict, str]:
def to_simple(self, minimal: bool = False) -> SerializedDatasetType:
"""Convert this class to a simple python type suitable for
serialization.
Expand All @@ -486,34 +497,35 @@ def to_simple(self, minimal: bool = False) -> Union[Dict, str]:
Returns
-------
simple : `dict` or `str`
The object converted to a dictionary or a simple string.
simple : `SerializedDatasetType`
The object converted to a class suitable for serialization.
"""
as_dict: Dict[str, Any]
if minimal:
# Only needs the name.
return self.name

# Convert to a dict form
as_dict = {"name": self.name,
"storageClass": self._storageClassName,
"isCalibration": self._isCalibration,
"dimensions": self.dimensions.to_simple(),
}
as_dict = {"name": self.name}
else:
# Convert to a dict form
as_dict = {"name": self.name,
"storageClass": self._storageClassName,
"isCalibration": self._isCalibration,
"dimensions": self.dimensions.to_simple(),
}

if self._parentStorageClassName is not None:
as_dict["parentStorageClass"] = self._parentStorageClassName
return as_dict
if self._parentStorageClassName is not None:
as_dict["parentStorageClass"] = self._parentStorageClassName
return SerializedDatasetType(**as_dict)

@classmethod
def from_simple(cls, simple: Union[Dict, str],
def from_simple(cls, simple: SerializedDatasetType,
universe: Optional[DimensionUniverse] = None,
registry: Optional[Registry] = None) -> DatasetType:
"""Construct a new object from the data returned from the `to_simple`
method.
Parameters
----------
simple : `dict` of [`str`, `Any`] or `str`
simple : `SerializedDatasetType`
The value returned by `to_simple()`.
universe : `DimensionUniverse`
The special graph of all known dimensions of which this graph will
Expand All @@ -528,11 +540,12 @@ def from_simple(cls, simple: Union[Dict, str],
datasetType : `DatasetType`
Newly-constructed object.
"""
if isinstance(simple, str):
if simple.storageClass is None:
# Treat this as minimalist representation
if registry is None:
raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType"
" without a Registry")
return registry.getDatasetType(simple)
return registry.getDatasetType(simple.name)

if universe is None and registry is None:
raise ValueError("One of universe or registry must be provided.")
Expand All @@ -545,15 +558,19 @@ def from_simple(cls, simple: Union[Dict, str],
# this is for mypy
raise ValueError("Unable to determine a usable universe")

return cls(name=simple["name"],
dimensions=DimensionGraph.from_simple(simple["dimensions"], universe=universe),
storageClass=simple["storageClass"],
isCalibration=simple.get("isCalibration", False),
parentStorageClass=simple.get("parentStorageClass"),
if simple.dimensions is None:
# mypy hint
raise ValueError(f"Dimensions must be specified in {simple}")

return cls(name=simple.name,
dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe),
storageClass=simple.storageClass,
isCalibration=simple.isCalibration,
parentStorageClass=simple.parentStorageClass,
universe=universe)

to_json = to_json_generic
from_json = classmethod(from_json_generic)
to_json = to_json_pydantic
from_json = classmethod(from_json_pydantic)

def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType],
Tuple[str, DimensionGraph, str, Optional[str]],
Expand Down
23 changes: 15 additions & 8 deletions python/lsst/daf/butler/core/dimensions/_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@

from __future__ import annotations

__all__ = ["DimensionGraph"]
__all__ = ["DimensionGraph", "SerializedDimensionGraph"]

from pydantic import BaseModel
import itertools
from types import MappingProxyType
from typing import (
Expand All @@ -43,7 +44,7 @@
from ..named import NamedValueAbstractSet, NamedValueSet
from ..utils import cached_getter, immutable
from .._topology import TopologicalSpace, TopologicalFamily
from ..json import from_json_generic, to_json_generic
from ..json import from_json_pydantic, to_json_pydantic

if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
from ._universe import DimensionUniverse
Expand All @@ -52,6 +53,10 @@
from ...registry import Registry


class SerializedDimensionGraph(BaseModel):
names: List[str]


@immutable
class DimensionGraph:
"""An immutable, dependency-complete collection of dimensions.
Expand Down Expand Up @@ -94,6 +99,8 @@ class DimensionGraph:
`DimensionUniverse`), or complete `~collection.abc.Set` semantics are
required.
"""
_serializedType = SerializedDimensionGraph

def __new__(
cls,
universe: DimensionUniverse,
Expand Down Expand Up @@ -189,7 +196,7 @@ def names(self) -> AbstractSet[str]:
"""
return self.dimensions.names

def to_simple(self, minimal: bool = False) -> List[str]:
def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph:
"""Convert this class to a simple python type suitable for
serialization.
Expand All @@ -204,10 +211,10 @@ def to_simple(self, minimal: bool = False) -> List[str]:
The names of the dimensions.
"""
# Names are all we can serialize.
return list(self.names)
return SerializedDimensionGraph(names=list(self.names))

@classmethod
def from_simple(cls, names: List[str],
def from_simple(cls, names: SerializedDimensionGraph,
universe: Optional[DimensionUniverse] = None,
registry: Optional[Registry] = None) -> DimensionGraph:
"""Construct a new object from the data returned from the `to_simple`
Expand Down Expand Up @@ -237,10 +244,10 @@ def from_simple(cls, names: List[str],
# this is for mypy
raise ValueError("Unable to determine a usable universe")

return cls(names=names, universe=universe)
return cls(names=names.names, universe=universe)

to_json = to_json_generic
from_json = classmethod(from_json_generic)
to_json = to_json_pydantic
from_json = classmethod(from_json_pydantic)

def __iter__(self) -> Iterator[Dimension]:
"""Iterate over all dimensions in the graph (and true `Dimension`
Expand Down

0 comments on commit 83d0dfa

Please sign in to comment.