Add system to Instrument for getting configurable dimension packers.

lsst · Apr 26, 2023 · b7b10cc · b7b10cc
1 parent 67dedb6
commit b7b10cc
Show file tree

Hide file tree

Showing 5 changed files with 452 additions and 3 deletions.
diff --git a/doc/changes/DM-31924.feature.md b/doc/changes/DM-31924.feature.md
@@ -0,0 +1 @@
+Add system for obtaining data ID packer objects from the combination of an `Instrument` class and configuration.
diff --git a/python/lsst/pipe/base/__init__.py b/python/lsst/pipe/base/__init__.py
@@ -1,6 +1,7 @@
 from . import connectionTypes, pipelineIR
 from ._dataset_handle import *
 from ._instrument import *
+from ._observation_dimension_packer import *
 from ._status import *
 from ._task_metadata import *
 from .butlerQuantumContext import *

diff --git a/python/lsst/pipe/base/_instrument.py b/python/lsst/pipe/base/_instrument.py
@@ -26,12 +26,15 @@
 import datetime
 import os.path
 from abc import ABCMeta, abstractmethod
-from typing import TYPE_CHECKING, Optional, Sequence, Type, Union, cast
+from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union, cast, final
 
-from lsst.daf.butler import DataCoordinate, DataId, DimensionRecord, Formatter
+from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
 from lsst.daf.butler.registry import DataIdError
+from lsst.pex.config import RegistryField
 from lsst.utils import doImportType
 
+from ._observation_dimension_packer import observation_packer_registry
+
 if TYPE_CHECKING:
     from lsst.daf.butler import Registry
     from lsst.pex.config import Config
@@ -535,3 +538,149 @@ def makeCollectionName(self, *labels: str) -> str:
             prefix.
         """
         return "/".join((self.collection_prefix,) + labels)
+
+    @staticmethod
+    def make_dimension_packer_config_field(
+        doc: str = (
+            "How to pack visit+detector or exposure+detector data IDs into integers. "
+            "The default (None) is to delegate to the Instrument class for which "
+            "registered implementation to use (but still use the nested configuration "
+            "for that implementation)."
+        ),
+    ) -> RegistryField:
+        """Make an `lsst.pex.config.Field` that can be used to configure how
+        data IDs for this instrument are packed.
+
+        Parameters
+        ----------
+        doc : `str`, optional
+            Documentation for the config field.
+
+        Returns
+        -------
+        field : `lsst.pex.config.RegistryField`
+            A config field for which calling ``apply`` on the instance
+            attribute constructs an `lsst.daf.butler.DimensionPacker` that
+            defaults to the appropriate one for this instrument.
+
+        Notes
+        -----
+        This method is expected to be used whenever code requires a single
+        integer that represents the combination of a detector and either a
+        visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
+        class and its helper configs provide a simpler high-level interface
+        that should be used instead of calling this method directly.
+
+        This system is designed to work best when the configuration for the ID
+        packer is not overridden at all, allowing the appropriate instrument
+        class to determine the behavior for each data ID encountered.  When the
+        configuration does need to be modified (most often when the scheme for
+        packing an instrument's data IDs is undergoing an upgrade), it is
+        important to ensure the overrides are only applied to data IDs with the
+        desired instrument value.
+
+        Unit tests of code that use a field produced by this method will often
+        want to explicitly set the packer to "observation" and manually set
+        its ``n_detectors`` and ``n_observations`` fields; this will make it
+        unnecessary for tests to provide expanded data IDs.
+        """
+        # The control flow here bounces around a bit when this RegistryField's
+        # apply() method is called, so it merits a thorough walkthrough
+        # somewhere, and that might as well be here:
+        #
+        # - If the config field's name is not `None`, that kind of packer is
+        #   constructed and returned with the arguments to `apply`, in just the
+        #   way it works with most RegistryFields or ConfigurableFields. But
+        #   this is expected to be rare.
+        #
+        # - If the config fields' name is `None`, the `apply` method (which
+        #   actually lives on the `pex.config.RegistryInstanceDict` class,
+        #   since `RegistryField` is a descriptor), calls
+        #   `_make_default_dimension_packer_dispatch` (which is final, and
+        #   hence the base class implementation just below is the only one).
+        #
+        # - `_make_default_dimension_packer_dispatch` instantiates an
+        #   `Instrument` instance of the type pointed at by the data ID (i.e.
+        #   calling `Instrument.from_data_id`), then calls
+        #   `_make_default_dimension_packer` on that.
+        #
+        # - The default implementation of `_make_default_dimension_packer` here
+        #    in the base class picks the "observation" dimension packer, so if
+        #   it's not overridden by a derived class everything proceeds as if
+        #   the config field's name was set to that.  Note that this sets which
+        #   item in the registry is used, but it still pays attention to the
+        #   configuration for that entry in the registry field.
+        #
+        # - A subclass implementation of `_make_default_dimension_packer` will
+        #   take precedence over the base class, but it's expected that these
+        #   will usually just delegate back to ``super()`` while changing the
+        #   ``default`` argument to something other than "observation". Once
+        #   again, this will control which packer entry in the registry is used
+        #   but the result will still reflect the configuration for that packer
+        #   in the registry field.
+        #
+        return observation_packer_registry.makeField(
+            doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
+        )
+
+    @staticmethod
+    @final
+    def _make_default_dimension_packer_dispatch(
+        config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
+    ) -> DimensionPacker:
+        """Dispatch method used to invoke `_make_dimension_packer`.
+
+        This method constructs the appropriate `Instrument` subclass from
+        config and then calls its `_make_default_dimension_packer`.
+        It is called when (as usual) the field returned by
+        `make_dimension_packer_config_field` is left to its default selection
+        of `None`.
+
+        All arguments and return values are the same as
+        `_make_default_dimension_packer.`
+        """
+        instrument = Instrument.from_data_id(data_id)
+        return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)
+
+    def _make_default_dimension_packer(
+        self,
+        config_dict: Any,
+        data_id: DataCoordinate,
+        is_exposure: bool | None = None,
+        default: str = "observation",
+    ) -> DimensionPacker:
+        """Construct return the default dimension packer for this instrument.
+
+        This method is a protected hook for subclasses to override the behavior
+        of `make_dimension_packer_config_field` when the packer is not selected
+        explicitly via configuration.
+
+        Parameters
+        ----------
+        config_dict
+            Mapping attribute of a `lsst.pex.config.Config` instance that
+            corresponds to a field created by `make_dimension_packer_config`
+            (the actual type of this object is a `lsst.pex.config`
+            implementation detail).
+        data_id : `lsst.daf.butler.DataCoordinate`
+            Data ID that identifies at least the ``instrument`` dimension.  For
+            most configurations this must have dimension records attached.
+        is_exposure : `bool`, optional
+            If `False`, construct a packer for visit+detector data IDs.  If
+            `True`, construct a packer for exposure+detector data IDs.  If
+            `None`, this is determined based on whether ``visit`` or
+            ``exposure`` is present in ``data_id``, with ``visit`` checked
+            first and hence used if both are present.
+        default : `str`, optional
+            Registered name of the dimension packer to select when the
+            configured packer is `None` (as is usually the case).  This is
+            intended primarily for derived classes delegating to `super` in
+            reimplementations of this method.
+
+        Returns
+        -------
+        packer : `lsst.daf.butler.DimensionPacker`
+            Object that packs {visit, detector} or {exposure, detector} data
+            IDs into integers.
+        """
+        return config_dict.apply_with(default, data_id, is_exposure=is_exposure)
diff --git a/python/lsst/pipe/base/_observation_dimension_packer.py b/python/lsst/pipe/base/_observation_dimension_packer.py
@@ -0,0 +1,168 @@
+# This file is part of pipe_base.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+__all__ = ("ObservationDimensionPacker", "ObservationDimensionPackerConfig", "observation_packer_registry")
+
+from typing import Any, cast
+
+from lsst.daf.butler import DataCoordinate, DimensionPacker
+from lsst.pex.config import Config, Field, makeRegistry
+
+observation_packer_registry = makeRegistry(
+    "Configurables that can pack visit+detector or exposure+detector data IDs into integers."
+)
+
+
+class ObservationDimensionPackerConfig(Config):
+    # Config fields are annotated as Any because support for better
+    # annotations is broken on Fields with optional=True.
+    n_detectors: Any = Field(
+        "Number of detectors, or, more precisely, one greater than the "
+        "maximum detector ID, for this instrument. "
+        "Default (None) obtains this value from the instrument dimension record. "
+        "This should rarely need to be overridden outside of tests.",
+        dtype=int,
+        default=None,
+        optional=True,
+    )
+    n_observations: Any = Field(
+        "Number of observations (visits or exposures, as per 'is_exposure`) "
+        "expected, or, more precisely, one greater than the maximum "
+        "visit/exposure ID. "
+        "Default (None) obtains this value from the instrument dimension record. "
+        "This should rarely need to be overridden outside of tests.",
+        dtype=int,
+        default=None,
+        optional=True,
+    )
+
+
+class ObservationDimensionPacker(DimensionPacker):
+    """A `DimensionPacker` for visit+detector or exposure+detector.
+
+    Parameters
+    ----------
+    data_id : `lsst.daf.butler.DataCoordinate`
+        Data ID that identifies at least the ``instrument`` dimension.  Must
+        have dimension records attached unless ``config.n_detectors`` and
+        ``config.n_visits`` are both not `None`.
+    config : `ObservationDimensionPackerConfig`
+        Configuration for this dimension packer.
+    is_exposure : `bool`, optional
+        If `False`, construct a packer for visit+detector data IDs.  If `True`,
+        construct a packer for exposure+detector data IDs.  If `None`,
+        this is determined based on whether ``visit`` or ``exposure`` is
+        present in ``data_id``, with ``visit`` checked first and hence used if
+        both are present.
+
+    Notes
+    -----
+    The standard pattern for constructing instances of the class is to use
+    `Instrument.make_dimension_packer`; see that method for details.
+
+    This packer assumes all visit/exposure and detector IDs are sequential or
+    otherwise densely packed between zero and their upper bound, such that
+    ``n_detectors`` * ``n_observations`` leaves plenty of bits remaining for
+    any other IDs that need to be included in the same integer (such as a
+    counter for Sources detected on an image with this data ID).  Instruments
+    whose data ID values are not densely packed, should provide their own
+    `~lsst.daf.butler.DimensionPacker` that takes advantage of the structure
+    of its IDs to compress them into fewer bits.
+    """
+
+    ConfigClass = ObservationDimensionPackerConfig
+
+    def __init__(
+        self,
+        data_id: DataCoordinate,
+        config: ObservationDimensionPackerConfig,
+        is_exposure: bool | None = None,
+    ):
+        fixed = data_id.subset(data_id.universe.extract(["instrument"]))
+        if is_exposure is None:
+            if "visit" in data_id.graph.names:
+                is_exposure = False
+            elif "exposure" in data_id.graph.names:
+                is_exposure = True
+            else:
+                raise ValueError(
+                    "'is_exposure' was not provided and 'data_id' has no visit or exposure value."
+                )
+        if is_exposure:
+            dimensions = fixed.universe.extract(["instrument", "exposure", "detector"])
+        else:
+            dimensions = fixed.universe.extract(["instrument", "visit", "detector"])
+        super().__init__(fixed, dimensions)
+        self.is_exposure = is_exposure
+        if config.n_detectors is not None:
+            self._n_detectors = config.n_detectors
+        else:
+            # Records accessed here should never be None; that possibility is
+            # only for non-dimension elements like join tables that are
+            # are sometimes not present in an expanded data ID.
+            self._n_detectors = fixed.records["instrument"].detector_max  # type: ignore[union-attr]
+        if config.n_observations is not None:
+            self._n_observations = config.n_observations
+        elif self.is_exposure:
+            self._n_observations = fixed.records["instrument"].exposure_max  # type: ignore[union-attr]
+        else:
+            self._n_observations = fixed.records["instrument"].visit_max  # type: ignore[union-attr]
+        self._max_bits = (self._n_observations * self._n_detectors - 1).bit_length()
+
+    @property
+    def maxBits(self) -> int:
+        # Docstring inherited from DimensionPacker.maxBits
+        return self._max_bits
+
+    def _pack(self, dataId: DataCoordinate) -> int:
+        # Docstring inherited from DimensionPacker._pack
+        detector_id = cast(int, dataId["detector"])
+        if detector_id >= self._n_detectors:
+            raise ValueError(f"Detector ID {detector_id} is out of bounds; expected <{self._n_detectors}.")
+        observation_id = cast(int, dataId["exposure" if self.is_exposure else "visit"])
+        if observation_id >= self._n_observations:
+            raise ValueError(
+                f"{'Exposure' if self.is_exposure else 'Visit'} ID {observation_id} is out of bounds; "
+                f"expected <{self._n_observations}."
+            )
+        return detector_id + self._n_detectors * observation_id
+
+    def unpack(self, packedId: int) -> DataCoordinate:
+        # Docstring inherited from DimensionPacker.unpack
+        observation, detector = divmod(packedId, self._n_detectors)
+        return DataCoordinate.standardize(
+            {
+                "instrument": self.fixed["instrument"],
+                "detector": detector,
+                ("exposure" if self.is_exposure else "visit"): observation,
+            },
+            graph=self.dimensions,
+        )
+
+
+observation_packer_registry = makeRegistry(
+    "Configurables that can pack visit+detector or exposure+detector data IDs into integers. "
+    "Members of this registry should be callable with the same signature as "
+    "`lsst.pipe.base.ObservationDimensionPacker` construction."
+)
+observation_packer_registry.register("observation", ObservationDimensionPacker)