Skip to content

Commit

Permalink
Merge pull request #318 from lsst/tickets/DM-38091
Browse files Browse the repository at this point in the history
DM-38091: Allow InMemoryDatasetHandle to take kwargs for dataId
  • Loading branch information
timj committed Apr 15, 2023
2 parents baa836c + e433505 commit 110985a
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-38091.feature.rst
@@ -0,0 +1 @@
Modified ``InMemoryDatasetHandle`` to allow it to be constructed with keyword arguments that will be converted to the relevant DataId.
3 changes: 3 additions & 0 deletions pyproject.toml
Expand Up @@ -28,6 +28,7 @@ dependencies = [
"networkx",
"pyyaml >= 5.1",
"numpy >= 1.17",
"frozendict",
]

dynamic = ["version"]
Expand Down Expand Up @@ -110,3 +111,5 @@ write_to = "python/lsst/pipe/base/version.py"
[tool.pytest.ini_options]
addopts = "--flake8"
flake8-ignore = ["E203", "W503", "N802", "N803", "N806", "N812", "N815", "N816"]
# Some unit tests open registry database and don't close it.
open_files_ignore = ["gen3.sqlite3"]
48 changes: 41 additions & 7 deletions python/lsst/pipe/base/_dataset_handle.py
Expand Up @@ -25,6 +25,7 @@
import dataclasses
from typing import Any, Optional

from frozendict import frozendict
from lsst.daf.butler import DataCoordinate, DimensionUniverse, StorageClass, StorageClassFactory


Expand All @@ -33,9 +34,42 @@ def _default_dataId() -> DataCoordinate:
return DataCoordinate.makeEmpty(DimensionUniverse())


@dataclasses.dataclass(frozen=True)
@dataclasses.dataclass(frozen=True, init=False)
class InMemoryDatasetHandle:
"""An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`."""
"""An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`.
If ``dataId`` is not specified, a default empty dataId will be constructed.
If ``kwargs`` are provided without specifying a ``dataId``, those
parameters will be converted into a dataId-like entity.
"""

def __init__(
self,
inMemoryDataset: Any,
*,
storageClass: StorageClass | None = None,
parameters: dict[str, Any] | None = None,
dataId: dict[str, Any] | DataCoordinate | None = None,
**kwargs: Any,
):
object.__setattr__(self, "inMemoryDataset", inMemoryDataset)
object.__setattr__(self, "storageClass", storageClass)
object.__setattr__(self, "parameters", parameters)
# Need to be able to construct a dataId from kwargs for convenience.
# This will not be a full DataCoordinate.
if dataId is None:
if kwargs:
dataId = frozendict(kwargs) # type: ignore
else:
dataId = DataCoordinate.makeEmpty(DimensionUniverse())
elif kwargs:
if isinstance(dataId, DataCoordinate):
dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe)
else:
new = dict(dataId)
new.update(kwargs)
dataId = frozendict(new) # type: ignore
object.__setattr__(self, "dataId", dataId)

def get(
self,
Expand Down Expand Up @@ -170,6 +204,11 @@ def _getStorageClass(self) -> StorageClass:
"""The object to store in this dataset handle for later retrieval.
"""

dataId: DataCoordinate | frozendict # type:ignore
"""The `~lsst.daf.butler.DataCoordinate` associated with this dataset
handle.
"""

storageClass: Optional[str] = None
"""The name of the `~lsst.daf.butler.StorageClass` associated with this
dataset.
Expand All @@ -181,8 +220,3 @@ def _getStorageClass(self) -> StorageClass:
"""Optional parameters that may be used to specify a subset of the dataset
to be loaded (`dict` or `None`).
"""

dataId: DataCoordinate = dataclasses.field(default_factory=_default_dataId)
"""The `~lsst.daf.butler.DataCoordinate` associated with this dataset
handle.
"""
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -2,9 +2,9 @@ pyyaml >= 5.1
pydantic
numpy >= 1.17
networkx
frozendict
git+https://github.com/lsst/daf_butler@main#egg=lsst-daf-butler
git+https://github.com/lsst/utils@main#egg=lsst-utils
git+https://github.com/lsst/resources@main#egg=lsst-resources
git+https://github.com/lsst/pex_config@main#egg=lsst-pex-config
git+https://github.com/lsst/daf_relation@main#egg=lsst-daf-relation
sqlalchemy >= 1.4, <2.0.0
12 changes: 12 additions & 0 deletions tests/test_dataset_handle.py
Expand Up @@ -105,6 +105,18 @@ def test_dataset_handle_dataid(self):
hdl = InMemoryDatasetHandle(42, dataId=dataId)
self.assertIs(hdl.dataId, dataId)

dataId = {"tract": 5, "patch": 2, "instrument": "TestCam"}
hdl = InMemoryDatasetHandle(42, **dataId)
self.assertEqual(hdl.dataId, dataId)

hdl = InMemoryDatasetHandle(42, dataId=dataId, tract=6)
self.assertEqual(hdl.dataId["tract"], 6)

dataId = DataCoordinate.standardize({}, universe=DimensionUniverse(), instrument="DummyCam")
hdl = InMemoryDatasetHandle(42, dataId=dataId, physical_filter="g")
self.assertIsInstance(hdl.dataId, DataCoordinate)
self.assertEqual(hdl.dataId["physical_filter"], "g")

def test_dataset_handle_metric(self):
metric = MetricsExample(summary={"a": 1, "b": 2}, output={"c": {"d": 5}}, data=[1, 2, 3, 4])

Expand Down

0 comments on commit 110985a

Please sign in to comment.