Skip to content

Commit

Permalink
Merge pull request #223 from lsst/tickets/DM-33155
Browse files Browse the repository at this point in the history
DM-33155: Change how task metadata storage class is chosen
  • Loading branch information
timj committed Jan 11, 2022
2 parents 205747a + 854214c commit bf0e57e
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 24 deletions.
4 changes: 4 additions & 0 deletions doc/changes/DM-33155.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
* Add ``TaskMetadata.to_dict()`` method (this is now used by the ``PropertySet.from_mapping()`` method and triggered by the Butler if type conversion is needed).
* Use the existing metadata storage class definition if one already exists in a repository.
* Switch `~lsst.pipe.base.Task` to use `~lsst.pipe.base.TaskMetadata` for storing task metadata, rather than ``lsst.daf.base.PropertySet``.
This removes a C++ dependency from the middleware.
25 changes: 24 additions & 1 deletion python/lsst/pipe/base/_task_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata":
Parameters
----------
ps : `lsst.daf.base.PropertySet` or `TaskMetadata`
ps : `PropertySetLike` or `TaskMetadata`
A ``PropertySet``-like object to be transformed to a
`TaskMetadata`. A `TaskMetadata` can be copied using this
class method.
Expand Down Expand Up @@ -133,6 +133,29 @@ class method.
metadata[key] = value
return metadata

def to_dict(self) -> Dict[str, Any]:
"""Convert the class to a simple dictionary.
Returns
-------
d : `dict`
Simple dictionary that can contain scalar values, array values
or other dictionary values.
Notes
-----
Unlike `dict()`, this method hides the model layout and combines
scalars, arrays, and other metadata in the same dictionary. Can be
used when a simple dictionary is needed. Use
`TaskMetadata.from_dict()` to convert it back.
"""
d = {}
d.update(self.scalars)
d.update(self.arrays)
for k, v in self.metadata.items():
d[k] = v.to_dict()
return d

def add(self, name, value):
"""Store a new value, adding to a list if one already exists.
Expand Down
17 changes: 12 additions & 5 deletions python/lsst/pipe/base/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,13 +902,20 @@ def makeDatasetTypesSet(connectionType: str, freeze: bool = True) -> NamedValueS
# optionally add output dataset for metadata
outputs = makeDatasetTypesSet("outputs", freeze=False)
if taskDef.metadataDatasetName is not None:
# Metadata is supposed to be of the PropertySet type, its
# dimensions correspond to a task quantum
# Metadata is supposed to be of the TaskMetadata type, its
# dimensions correspond to a task quantum.
dimensions = registry.dimensions.extract(taskDef.connections.dimensions)
if _TASK_METADATA_TYPE is TaskMetadata:
storageClass = "TaskMetadata"

# Allow the storage class definition to be read from the existing
# dataset type definition if present.
try:
current = registry.getDatasetType(taskDef.metadataDatasetName)
except KeyError:
# No previous definition so use the default.
storageClass = "TaskMetadata" if _TASK_METADATA_TYPE is TaskMetadata else "PropertySet"
else:
storageClass = "PropertySet"
storageClass = current.storageClass.name

outputs |= {DatasetType(taskDef.metadataDatasetName, dimensions, storageClass)}
if taskDef.logOutputDatasetName is not None:
# Log output dimensions correspond to a task quantum.
Expand Down
23 changes: 7 additions & 16 deletions python/lsst/pipe/base/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,11 @@

from ._task_metadata import TaskMetadata

# The Task metadata can be represented as different Python types.
# Initially Task metadata was stored as a PropertyList but we want
# to migrate to TaskMetadata to have explicit control over how it works
# and how it is serialized.
METADATA_COMPATIBILITY = True

if METADATA_COMPATIBILITY:
import lsst.daf.base as dafBase

_TASK_METADATA_TYPE = dafBase.PropertyList
_TASK_FULL_METADATA_TYPE = dafBase.PropertySet
else:
_TASK_METADATA_TYPE = TaskMetadata
_TASK_FULL_METADATA_TYPE = TaskMetadata
# This defines the Python type to use for task metadata. It is a private
# class variable that can be accessed by other closely-related middleware
# code and test code.
_TASK_METADATA_TYPE = TaskMetadata
_TASK_FULL_METADATA_TYPE = TaskMetadata


class TaskError(Exception):
Expand Down Expand Up @@ -119,7 +110,7 @@ class Task:
- ``log``: an `logging.Logger` or subclass.
- ``config``: task-specific configuration; an instance of ``ConfigClass``
(see below).
- ``metadata``: an `lsst.daf.base.PropertyList` or `TaskMetadata` for
- ``metadata``: a `TaskMetadata` for
collecting task-specific metadata, e.g. data quality and performance
metrics. This is data that is only meant to be persisted, never to be
used by the task.
Expand Down Expand Up @@ -279,7 +270,7 @@ def getFullMetadata(self):
Returns
-------
metadata : `lsst.daf.base.PropertySet` or `TaskMetadata`
metadata : `TaskMetadata`
The keys are the full task name.
Values are metadata for the top-level task and all subtasks,
sub-subtasks, etc.
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/pipe/base/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def logInfo(obj, prefix, logLevel=logging.DEBUG, metadata=None, logger=None):
obj : `lsst.pipe.base.Task`-type or `None`
A `~lsst.pipe.base.Task` or any other object with these two attributes:
- ``metadata`` an instance of `lsst.daf.base.PropertyList`` (or other
- ``metadata`` an instance of `~lsst.pipe.base.TaskMetadata` (or other
object with ``add(name, value)`` method).
- ``log`` an instance of `logging.Logger` or subclass.
Expand All @@ -55,7 +55,7 @@ def logInfo(obj, prefix, logLevel=logging.DEBUG, metadata=None, logger=None):
``prefix = End`` when the method ends.
logLevel : `int`, optional
Log level (an `logging` level constant, such as `logging.DEBUG`).
metadata : `lsst.daf.base.PropertyList`, optional
metadata : `lsst.pipe.base.TaskMetadata`, optional
Metadata object to write entries to, overriding ``obj.metadata``.
logger : `logging.Logger`
Log object to write entries to, overriding ``obj.log``.
Expand Down
3 changes: 3 additions & 0 deletions tests/test_taskmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ def testDict(self):
self.assertEqual(meta.getArray("d"), [1, 2])
self.assertEqual(meta["e.h.i"], 4)

d2 = meta.to_dict()
self.assertEqual(d2, d)

j = meta.json()
meta2 = TaskMetadata.parse_obj(json.loads(j))
self.assertEqual(meta2, meta)
Expand Down

0 comments on commit bf0e57e

Please sign in to comment.