Skip to content

Commit

Permalink
Use immutable decorator on DatasetRef and add __hash__.
Browse files Browse the repository at this point in the history
This makes the comparison-key attributes of DatasetRef into regular
attributes instead of properties, while blocking the "ref._id = x"
loophole we previously exploited to modify DatasetRefs in place.

That makes it safe to define __hash__, and so now we do.
  • Loading branch information
TallJimbo committed Dec 5, 2019
1 parent d4da29f commit 4619ccd
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 70 deletions.
116 changes: 60 additions & 56 deletions python/lsst/daf/butler/core/datasets/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,17 @@
__all__ = ["DatasetRef"]

import hashlib
from typing import Mapping, Optional, Tuple
from typing import Any, Dict, Mapping, Optional, Tuple

from types import MappingProxyType
from ..dimensions import DataCoordinate, DimensionGraph
from ..run import Run
from ..configSupport import LookupKey
from ..utils import immutable
from .type import DatasetType


def _safeMakeMappingProxyType(data):
if data is None:
data = {}
return MappingProxyType(data)


@immutable
class DatasetRef:
"""Reference to a Dataset in a `Registry`.
Expand Down Expand Up @@ -70,85 +66,59 @@ class DatasetRef:
that the dimensions are already consistent.
"""

__slots__ = ("_id", "_datasetType", "_dataId", "_run", "_hash", "_components")
__slots__ = ("id", "datasetType", "dataId", "run", "_hash", "_components")

def __init__(self, datasetType: DatasetType, dataId: DataCoordinate, *,
id: Optional[int] = None,
run: Optional[Run] = None, hash: Optional[bytes] = None,
components: Optional[Mapping[str, DatasetRef]] = None, conform: bool = True):
def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *,
id: Optional[int] = None,
run: Optional[Run] = None, hash: Optional[bytes] = None,
components: Optional[Mapping[str, DatasetRef]] = None, conform: bool = True) -> DatasetRef:
self = super().__new__(cls)
assert isinstance(datasetType, DatasetType)
# TODO: it would be nice to guarantee that id and run should be either
# both None or not None together. We can't easily do that yet because
# the Query infrastructure has a hard time obtaining Run objects, but
# that will change.
self._id = id
self._datasetType = datasetType
self.id = id
self.datasetType = datasetType
if conform:
self._dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
else:
self._dataId = dataId
self.dataId = dataId
self._components = dict()
if components is not None:
self._components.update(components)
self._run = run
self._hash = hash
self.run = run
if hash is not None:
self._hash = hash
return self

def __eq__(self, other: DatasetRef):
return (self._datasetType, self.dataId, self._id) == (other._datasetType, other._dataId, other._id)
return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)

def __hash__(self) -> int:
return hash(self.datasetType, self.dataId, self.id)

def __repr__(self):
return f"DatasetRef({self.datasetType}, {self.dataId}, id={self.id}, run={self.run})"

@property
def id(self) -> Optional[int]:
"""Primary key of the dataset (`int` or `None`)
Typically assigned by `Registry`.
"""
return self._id

@property
def hash(self) -> bytes:
"""Secure hash of the `DatasetType` name and data ID (`bytes`).
"""
if self._hash is None:
if not hasattr(self, "_hash"):
message = hashlib.blake2b(digest_size=32)
message.update(self.datasetType.name.encode("utf8"))
self.dataId.fingerprint(message.update)
self._hash = message.digest()
return self._hash

@property
def datasetType(self) -> DatasetType:
"""The `DatasetType` associated with the Dataset the `DatasetRef`
points to.
"""
return self._datasetType

@property
def dataId(self) -> DataCoordinate:
"""A mapping of `Dimension` primary key values that labels the Dataset
within a Collection (`DataCoordinate`).
"""
return self._dataId

@property
def run(self) -> Optional[Run]:
"""The `~lsst.daf.butler.Run` instance that produced (or will produce)
the Dataset.
Read-only; update via `~lsst.daf.butler.Registry.addDataset()` or
`~lsst.daf.butler.Butler.put()`.
"""
return self._run

@property
def components(self) -> Mapping[str, DatasetRef]:
"""Named `DatasetRef` components.
Read-only; update via `Registry.attachComponent()`.
Read-only (but not immutable); update via `Registry.attachComponent()`.
"""
return _safeMakeMappingProxyType(self._components)
return MappingProxyType(self._components)

@property
def dimensions(self) -> DimensionGraph:
Expand All @@ -163,6 +133,10 @@ def __str__(self) -> str:
return "DatasetRef({}, id={}, dataId={} {})".format(self.datasetType.name,
self.id, self.dataId, components)

def __getnewargs_ex__(self) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
return ((self.datasetType, self.dataId),
{"id": self.id, "run": self.run, "components": self._components})

def resolved(self, id: int, run: Run, components: Optional[Mapping[str, DatasetRef]] = None
) -> DatasetRef:
"""Return a new `DatasetRef` with the same data ID and dataset type
Expand All @@ -187,7 +161,7 @@ def resolved(self, id: int, run: Run, components: Optional[Mapping[str, DatasetR
if components:
newComponents.update(components)
return DatasetRef(datasetType=self.datasetType, dataId=self.dataId,
id=id, run=run, hash=self._hash, components=newComponents, conform=False)
id=id, run=run, hash=self.hash, components=newComponents, conform=False)

def unresolved(self) -> DatasetRef:
"""Return a new `DatasetRef` with the same data ID and dataset type,
Expand All @@ -207,7 +181,7 @@ def unresolved(self) -> DatasetRef:
if ref1.unresolved() == ref2.unresolved():
...
"""
return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, hash=self._hash, conform=False)
return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, hash=self.hash, conform=False)

def isComponent(self) -> bool:
"""Boolean indicating whether this `DatasetRef` refers to a
Expand Down Expand Up @@ -253,3 +227,33 @@ def _lookupNames(self) -> Tuple[LookupKey]:
for n in names) + names

return names

datasetType: DatasetType
"""The definition of this dataset (`DatasetType`).
Cannot be changed after a `DatasetRef` is constructed.
"""

dataId: DataCoordinate
"""A mapping of `Dimension` primary key values that labels the dataset
within a Collection (`DataCoordinate`).
Cannot be changed after a `DatasetRef` is constructed.
"""

run: Optional[Run]
"""The `~lsst.daf.butler.Run` instance that produced (or will produce)
the dataset.
Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
`unresolved` to add or remove this information when creating a new
`DatasetRef`.
"""

id: Optional[int]
"""Primary key of the dataset (`int` or `None`).
Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
`unresolved` to add or remove this information when creating a new
`DatasetRef`.
"""
28 changes: 14 additions & 14 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,27 +203,22 @@ class DatasetRefTestCase(unittest.TestCase):

def setUp(self):
self.universe = DimensionUniverse()
datasetTypeName = "test"
storageClass = StorageClass("testref_StructuredData")
dimensions = self.universe.extract(("instrument", "visit"))
self.dataId = dict(instrument="DummyCam", visit=42)
self.datasetType = DatasetType(datasetTypeName, dimensions, storageClass)

def testConstructor(self):
"""Test construction preserves values.
"""
datasetTypeName = "test"
storageClass = StorageClass("testref_StructuredData")
dimensions = self.universe.extract(("instrument", "visit"))
dataId = dict(instrument="DummyCam", visit=42)
datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
ref = DatasetRef(datasetType, dataId)
self.assertEqual(ref.datasetType, datasetType)
self.assertEqual(ref.dataId, dataId, msg=ref.dataId)
ref = DatasetRef(self.datasetType, self.dataId)
self.assertEqual(ref.datasetType, self.datasetType)
self.assertEqual(ref.dataId, self.dataId, msg=ref.dataId)
self.assertEqual(ref.components, dict())

def testResolving(self):
datasetTypeName = "test"
storageClass = StorageClass("testref_StructuredData")
dimensions = self.universe.extract(("instrument", "visit"))
dataId = dict(instrument="DummyCam", visit=42)
datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
ref = DatasetRef(datasetType, dataId, id=1, run=Run("somerun"))
ref = DatasetRef(self.datasetType, self.dataId, id=1, run=Run("somerun"))
unresolvedRef = ref.unresolved()
self.assertIsNotNone(ref.id)
self.assertIsNone(unresolvedRef.id)
Expand All @@ -235,6 +230,11 @@ def testResolving(self):
self.assertEqual(ref, reresolvedRef)
self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)

def testPickle(self):
ref = DatasetRef(self.datasetType, self.dataId, id=1, run=Run("somerun"))
s = pickle.dumps(ref)
self.assertEqual(pickle.loads(s), ref)


if __name__ == "__main__":
unittest.main()

0 comments on commit 4619ccd

Please sign in to comment.