Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-42928: Add methods to get and set nested dictionaries from TaskMetadata #403

Merged
merged 7 commits into from
Feb 22, 2024
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ repos:
- id: trailing-whitespace
- id: check-toml
- repo: https://github.com/psf/black
rev: 23.12.1
rev: 24.1.1
hooks:
- id: black
# It is recommended to specify the latest version of Python
Expand Down
3 changes: 3 additions & 0 deletions doc/changes/DM-42928.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add `TaskMetadata.get_dict` and `set_dict` methods.

These provide a consistent way to assign and extract nested dictionaries from `TaskMetadata`, `lsst.daf.base.PropertySet`, and `lsst.daf.base.PropertyList`.
12 changes: 12 additions & 0 deletions python/lsst/pipe/base/_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@
"InvalidQuantumError",
)

from typing import Protocol

from ._task_metadata import GetSetDictMetadata


class GetSetDictMetadataHolder(Protocol):
"""Protocol for objects that have a ``metadata`` attribute that satisfies
`GetSetDictMetadata`.
"""

metadata: GetSetDictMetadata | None


class NoWorkFound(BaseException):
"""An exception raised when a Quantum should not exist because there is no
Expand Down
101 changes: 99 additions & 2 deletions python/lsst/pipe/base/_task_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,30 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

__all__ = ["TaskMetadata"]
__all__ = [
"TaskMetadata",
"SetDictMetadata",
"GetDictMetadata",
"GetSetDictMetadata",
"NestedMetadataDict",
]

import itertools
import numbers
import sys
from collections.abc import Collection, Iterator, Mapping, Sequence
from typing import Any, Protocol
from typing import Any, Protocol, TypeAlias, Union

from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr

# The types allowed in a Task metadata field are restricted
# to allow predictable serialization.
_ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)

# Note that '|' syntax for unions doesn't work when we have to use a string
# literal (and we do since it's recursive and not an annotation).
NestedMetadataDict: TypeAlias = Mapping[str, Union[str, float, int, bool, "NestedMetadataDict"]]


class PropertySetLike(Protocol):
"""Protocol that looks like a ``lsst.daf.base.PropertySet``.
Expand All @@ -56,6 +66,50 @@ def _isListLike(v: Any) -> bool:
return isinstance(v, Sequence) and not isinstance(v, str)


class SetDictMetadata(Protocol):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could there be a single DictMetadata protocol that has both in?

"""Protocol for objects that can be assigned a possibly-nested `dict` of
primitives.

This protocol is satisfied by `TaskMetadata`, `lsst.daf.base.PropertySet`,
and `lsst.daf.base.PropertyList`, providing a consistent way to insert a
dictionary into these objects that avoids their historical idiosyncrasies.

The form in which these entries appear in the object's native keys and
values is implementation-defined. *Empty nested dictionaries may be
dropped, and if the top-level dictionary is empty this method may do
nothing.*

Neither the top-level key nor nested keys may contain ``.`` (period)
characters.
"""

def set_dict(self, key: str, nested: NestedMetadataDict) -> None: ...


class GetDictMetadata(Protocol):
"""Protocol for objects that can extract a possibly-nested mapping of
primitives.

This protocol is satisfied by `TaskMetadata`, `lsst.daf.base.PropertySet`,
and `lsst.daf.base.PropertyList`, providing a consistent way to extract a
dictionary from these objects that avoids their historical idiosyncrasies.

This is guaranteed to work for mappings inserted by
`~SetMapping.set_dict`. It should not be expected to work for values
inserted in other ways. If a value was never inserted with the given key
at all, *an empty `dict` will be returned* (this is a concession to
implementation constraints in `~lsst.daf.base.PropertyList`.
"""

def get_dict(self, key: str) -> NestedMetadataDict: ...


class GetSetDictMetadata(SetDictMetadata, GetDictMetadata, Protocol):
"""Protocol for objects that can assign and extract a possibly-nested
mapping of primitives.
"""


class TaskMetadata(BaseModel):
"""Dict-like object for storing task metadata.

Expand Down Expand Up @@ -477,6 +531,49 @@ def __delitem__(self, key: str) -> None:
# Report the correct key.
raise KeyError(f"'{key}' not found'") from None

def get_dict(self, key: str) -> NestedMetadataDict:
"""Return a possibly-hierarchical nested `dict`.

This implements the `GetDictMetadata` protocol for consistency with
`lsst.daf.base.PropertySet` and `lsst.daf.base.PropertyList`. The
returned `dict` is guaranteed to be a deep copy, not a view.

Parameters
----------
key : `str`
String key associated with the mapping. May not have a ``.``
character.

Returns
-------
value : `~collections.abc.Mapping`
Possibly-nested mapping, with `str` keys and values that are `int`,
`float`, `str`, `bool`, or another `dict` with the same key and
value types. Will be empty if ``key`` does not exist.
timj marked this conversation as resolved.
Show resolved Hide resolved
"""
if value := self.get(key):
return value.to_dict()
else:
return {}

def set_dict(self, key: str, value: NestedMetadataDict) -> None:
"""Assign a possibly-hierarchical nested `dict`.

This implements the `SetDictMetadata` protocol for consistency with
`lsst.daf.base.PropertySet` and `lsst.daf.base.PropertyList`.

Parameters
----------
key : `str`
String key associated with the mapping. May not have a ``.``
character.
value : `~collections.abc.Mapping`
Possibly-nested mapping, with `str` keys and values that are `int`,
`float`, `str`, `bool`, or another `dict` with the same key and
value types. Nested keys may not have a ``.`` character.
"""
self[key] = value

def _validate_value(self, value: Any) -> tuple[str, Any]:
"""Validate the given value.

Expand Down
20 changes: 20 additions & 0 deletions tests/test_taskmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,26 @@ def testNumpy(self):
with self.assertRaises(ValueError):
meta["numpy"] = numpy.zeros(5)

def test_get_set_dict(self):
"""Test the get_dict and set_dict methods."""
obj = TaskMetadata()
d1 = {"one": 1, "two": 2.0, "three": True, "four": {"a": 4, "b": "B"}, "five": {}}
obj.set_dict("d", d1)
obj.set_dict("e", {})
d2 = obj.get_dict("d")
# Keys with empty-dict values may or may not be round-tripped.
self.assertGreaterEqual(d2.keys(), {"one", "two", "three", "four"})
self.assertLessEqual(d2.keys(), {"one", "two", "three", "four", "five"})
self.assertEqual(d2["one"], d1["one"])
self.assertEqual(d2["two"], d1["two"])
self.assertEqual(d2["three"], d1["three"])
self.assertEqual(d2["four"], d1["four"])
self.assertEqual(d2.get("five", {}), d1["five"])
# Empty dict may or may not have been added, and retrieving it or
# a key that was never added yields an empty dict.
self.assertEqual(obj.get_dict("e"), {})
self.assertEqual(obj.get_dict("f"), {})


if __name__ == "__main__":
unittest.main()