Skip to content

Commit

Permalink
Merge branch 'tickets/DM-30266'
Browse files Browse the repository at this point in the history
  • Loading branch information
natelust committed Dec 12, 2021
2 parents 1184064 + 08a9926 commit ad64451
Show file tree
Hide file tree
Showing 9 changed files with 597 additions and 20 deletions.
38 changes: 35 additions & 3 deletions python/lsst/daf/butler/core/datasets/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,29 @@ def _check_component(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805
raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
return v

@classmethod
def direct(cls, *, id: Optional[Union[str, int]] = None, datasetType: Optional[Dict[str, Any]] = None,
dataId: Optional[Dict[str, Any]] = None, run: str = None, component: Optional[str] = None
) -> SerializedDatasetRef:
"""Construct a `SerializedDatasetRef` directly without validators.
This differs from the pydantic "construct" method in that the arguments
are explicitly what the model requires, and it will recurse through
members, constructing them from their corresponding `direct` methods.
This method should only be called when the inputs are trusted.
"""
node = SerializedDatasetRef.__new__(cls)
setter = object.__setattr__
setter(node, 'id', uuid.UUID(id) if isinstance(id, str) else id)
setter(node, 'datasetType',
datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType))
setter(node, 'dataId', dataId if dataId is None else SerializedDataCoordinate.direct(**dataId))
setter(node, 'run', run)
setter(node, 'component', component)
setter(node, '__fields_set__', {'id', 'datasetType', 'dataId', 'run', 'component'})
return node


DatasetId = Union[int, uuid.UUID]
"""A type-annotation alias for dataset ID which could be either integer or
Expand Down Expand Up @@ -249,7 +272,8 @@ def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
@classmethod
def from_simple(cls, simple: SerializedDatasetRef,
universe: Optional[DimensionUniverse] = None,
registry: Optional[Registry] = None) -> DatasetRef:
registry: Optional[Registry] = None,
datasetType: Optional[DatasetType] = None) -> DatasetRef:
"""Construct a new object from simplified form.
Generally this is data returned from the `to_simple` method.
Expand All @@ -265,6 +289,11 @@ def from_simple(cls, simple: SerializedDatasetRef,
Registry to use to convert simple form of a DatasetRef to
a full `DatasetRef`. Can be `None` if a full description of
the type is provided along with a universe.
datasetType : DatasetType, optional
If datasetType is supplied, this will be used as the datasetType
object in the resulting DatasetRef instead of being read from
the `SerializedDatasetRef`. This is useful when many refs share
the same type as memory can be saved. Defaults to None.
Returns
-------
Expand Down Expand Up @@ -295,10 +324,13 @@ def from_simple(cls, simple: SerializedDatasetRef,
# this is for mypy
raise ValueError("Unable to determine a usable universe")

if simple.datasetType is None:
if simple.datasetType is None and datasetType is None:
# mypy
raise ValueError("The DatasetType must be specified to construct a DatasetRef")
datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
if datasetType is None:
if simple.datasetType is None:
raise ValueError("Cannot determine Dataset type of this serialized class")
datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)

if simple.dataId is None:
# mypy
Expand Down
25 changes: 25 additions & 0 deletions python/lsst/daf/butler/core/datasets/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,31 @@ class SerializedDatasetType(BaseModel):
parentStorageClass: Optional[StrictStr] = None
isCalibration: StrictBool = False

@classmethod
def direct(cls, *, name: str, storageClass: Optional[str] = None,
dimensions: Optional[Dict] = None,
parentStorageClass: Optional[str] = None, isCalibration: bool = False
) -> SerializedDatasetType:
"""Construct a `SerializedDatasetType` directly without validators.
This differs from PyDantics construct method in that the arguments are
explicitly what the model requires, and it will recurse through
members, constructing them from their corresponding `direct` methods.
This method should only be called when the inputs are trusted.
"""
node = SerializedDatasetType.__new__(cls)
setter = object.__setattr__
setter(node, 'name', name)
setter(node, 'storageClass', storageClass)
setter(node, 'dimensions',
dimensions if dimensions is None else SerializedDimensionGraph.direct(**dimensions))
setter(node, 'parentStorageClass', parentStorageClass)
setter(node, 'isCalibration', isCalibration)
setter(node, '__fields_set__', {'name', 'storageClass', 'dimensions', 'parentStorageClass',
'isCalibration'})
return node


class DatasetType:
r"""A named category of Datasets.
Expand Down
23 changes: 22 additions & 1 deletion python/lsst/daf/butler/core/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"""
from __future__ import annotations

from lsst import sphgeom

__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
"AstropyTimeNsecTai", "GUID")

Expand Down Expand Up @@ -127,6 +129,15 @@ def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.
# native `bytes`.
return b64decode(value.encode("ascii")) if value is not None else None

@property
def python_type(self) -> Type[bytes]:
return bytes


# create an alias, for use below to disambiguate between the built in
# sqlachemy type
LocalBase64Bytes = Base64Bytes


class Base64Region(Base64Bytes):
"""A SQLAlchemy custom type for Python `sphgeom.Region`.
Expand All @@ -146,6 +157,10 @@ def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.
return None
return Region.decode(super().process_result_value(value, dialect))

@property
def python_type(self) -> Type[sphgeom.Region]:
return sphgeom.Region


class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
"""A SQLAlchemy custom type for Python `astropy.time.Time`.
Expand Down Expand Up @@ -370,7 +385,13 @@ def getPythonType(self) -> type:
type : `type`
Python type associated with this field's (SQL) `dtype`.
"""
return self.dtype().python_type
# to construct these objects, nbytes keyword is needed
if issubclass(self.dtype, LocalBase64Bytes):
# satisfy mypy for something that must be true
assert self.nbytes is not None
return self.dtype(nbytes=self.nbytes).python_type
else:
return self.dtype().python_type # type: ignore


@dataclass
Expand Down
19 changes: 19 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_coordinate.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,25 @@ class SerializedDataCoordinate(BaseModel):
dataId: Dict[str, DataIdValue]
records: Optional[Dict[str, SerializedDimensionRecord]] = None

@classmethod
def direct(cls, *, dataId: Dict[str, DataIdValue], records: Dict[str, Dict]) -> SerializedDataCoordinate:
"""Construct a `SerializedDataCoordinate` directly without validators.
This differs from the pydantic "construct" method in that the arguments
are explicitly what the model requires, and it will recurse through
members, constructing them from their corresponding `direct` methods.
This method should only be called when the inputs are trusted.
"""
node = SerializedDataCoordinate.__new__(cls)
setter = object.__setattr__
setter(node, 'dataId', dataId)
setter(node, 'records',
records if records is None else
{k: SerializedDimensionRecord.direct(**v) for k, v in records.items()})
setter(node, '__fields_set__', {'dataId', 'records'})
return node


def _intersectRegions(*args: Region) -> Optional[Region]:
"""Return the intersection of several regions.
Expand Down
15 changes: 15 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ class SerializedDimensionGraph(BaseModel):

names: List[str]

@classmethod
def direct(cls, *, names: List[str]) -> SerializedDimensionGraph:
"""Construct a `SerializedDimensionGraph` directly without validators.
This differs from the pydantic "construct" method in that the arguments
are explicitly what the model requires, and it will recurse through
members, constructing them from their corresponding `direct` methods.
This method should only be called when the inputs are trusted.
"""
node = SerializedDimensionGraph.__new__(cls)
object.__setattr__(node, 'names', names)
object.__setattr__(node, '__fields_set__', {'names'})
return node


@immutable
class DimensionGraph:
Expand Down
31 changes: 30 additions & 1 deletion python/lsst/daf/butler/core/dimensions/_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,30 @@ class Config:
}
}

@classmethod
def direct(cls, *, definition: str, record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool,
StrictInt, Tuple[int, int]]]
) -> SerializedDimensionRecord:
"""Construct a `SerializedDimensionRecord` directly without validators.
This differs from the pydantic "construct" method in that the arguments
are explicitly what the model requires, and it will recurse through
members, constructing them from their corresponding `direct` methods.
This method should only be called when the inputs are trusted.
"""
node = cls.construct(definition=definition, record=record)
node = SerializedDimensionRecord.__new__(cls)
setter = object.__setattr__
setter(node, 'definition', definition)
# This method requires tuples as values of the mapping, but JSON
# readers will read things in as lists. Be kind and transparently
# transform to tuples
setter(node, 'record', {k: v if type(v) != list else tuple(v) # type: ignore
for k, v in record.items()})
setter(node, '__fields_set__', {'definition', 'record'})
return node


@immutable
class DimensionRecord:
Expand Down Expand Up @@ -298,7 +322,10 @@ def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
# and also history. Here use a different approach.
# This code needs to be migrated to sphgeom
mapping[k] = v.encode().hex()

if isinstance(v, bytes):
# We actually can't handle serializing out to bytes for
# hash objects, encode it here to a hex string
mapping[k] = v.hex()
definition = self.definition.to_simple(minimal=minimal)
return SerializedDimensionRecord(definition=definition, record=mapping)

Expand Down Expand Up @@ -353,6 +380,8 @@ def from_simple(cls, simple: SerializedDimensionRecord,
if (reg := "region") in rec:
encoded = bytes.fromhex(rec[reg])
rec[reg] = lsst.sphgeom.Region.decode(encoded)
if (hsh := "hash") in rec:
rec[hsh] = bytes.fromhex(rec[hsh].decode())

return _reconstructDimensionRecord(definition, rec)

Expand Down

0 comments on commit ad64451

Please sign in to comment.