Skip to content

Commit

Permalink
Add system to Instrument for getting configurable dimension packers.
Browse files Browse the repository at this point in the history
  • Loading branch information
TallJimbo committed Apr 26, 2023
1 parent 67dedb6 commit b7b10cc
Show file tree
Hide file tree
Showing 5 changed files with 452 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-31924.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add system for obtaining data ID packer objects from the combination of an `Instrument` class and configuration.
1 change: 1 addition & 0 deletions python/lsst/pipe/base/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from . import connectionTypes, pipelineIR
from ._dataset_handle import *
from ._instrument import *
from ._observation_dimension_packer import *
from ._status import *
from ._task_metadata import *
from .butlerQuantumContext import *
Expand Down
153 changes: 151 additions & 2 deletions python/lsst/pipe/base/_instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@
import datetime
import os.path
from abc import ABCMeta, abstractmethod
from typing import TYPE_CHECKING, Optional, Sequence, Type, Union, cast
from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union, cast, final

from lsst.daf.butler import DataCoordinate, DataId, DimensionRecord, Formatter
from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
from lsst.daf.butler.registry import DataIdError
from lsst.pex.config import RegistryField
from lsst.utils import doImportType

from ._observation_dimension_packer import observation_packer_registry

if TYPE_CHECKING:
from lsst.daf.butler import Registry
from lsst.pex.config import Config
Expand Down Expand Up @@ -535,3 +538,149 @@ def makeCollectionName(self, *labels: str) -> str:
prefix.
"""
return "/".join((self.collection_prefix,) + labels)

@staticmethod
def make_dimension_packer_config_field(
doc: str = (
"How to pack visit+detector or exposure+detector data IDs into integers. "
"The default (None) is to delegate to the Instrument class for which "
"registered implementation to use (but still use the nested configuration "
"for that implementation)."
),
) -> RegistryField:
"""Make an `lsst.pex.config.Field` that can be used to configure how
data IDs for this instrument are packed.
Parameters
----------
doc : `str`, optional
Documentation for the config field.
Returns
-------
field : `lsst.pex.config.RegistryField`
A config field for which calling ``apply`` on the instance
attribute constructs an `lsst.daf.butler.DimensionPacker` that
defaults to the appropriate one for this instrument.
Notes
-----
This method is expected to be used whenever code requires a single
integer that represents the combination of a detector and either a
visit or exposure, but in most cases the `lsst.meas.base.IdGenerator`
class and its helper configs provide a simpler high-level interface
that should be used instead of calling this method directly.
This system is designed to work best when the configuration for the ID
packer is not overridden at all, allowing the appropriate instrument
class to determine the behavior for each data ID encountered. When the
configuration does need to be modified (most often when the scheme for
packing an instrument's data IDs is undergoing an upgrade), it is
important to ensure the overrides are only applied to data IDs with the
desired instrument value.
Unit tests of code that use a field produced by this method will often
want to explicitly set the packer to "observation" and manually set
its ``n_detectors`` and ``n_observations`` fields; this will make it
unnecessary for tests to provide expanded data IDs.
"""
# The control flow here bounces around a bit when this RegistryField's
# apply() method is called, so it merits a thorough walkthrough
# somewhere, and that might as well be here:
#
# - If the config field's name is not `None`, that kind of packer is
# constructed and returned with the arguments to `apply`, in just the
# way it works with most RegistryFields or ConfigurableFields. But
# this is expected to be rare.
#
# - If the config fields' name is `None`, the `apply` method (which
# actually lives on the `pex.config.RegistryInstanceDict` class,
# since `RegistryField` is a descriptor), calls
# `_make_default_dimension_packer_dispatch` (which is final, and
# hence the base class implementation just below is the only one).
#
# - `_make_default_dimension_packer_dispatch` instantiates an
# `Instrument` instance of the type pointed at by the data ID (i.e.
# calling `Instrument.from_data_id`), then calls
# `_make_default_dimension_packer` on that.
#
# - The default implementation of `_make_default_dimension_packer` here
# in the base class picks the "observation" dimension packer, so if
# it's not overridden by a derived class everything proceeds as if
# the config field's name was set to that. Note that this sets which
# item in the registry is used, but it still pays attention to the
# configuration for that entry in the registry field.
#
# - A subclass implementation of `_make_default_dimension_packer` will
# take precedence over the base class, but it's expected that these
# will usually just delegate back to ``super()`` while changing the
# ``default`` argument to something other than "observation". Once
# again, this will control which packer entry in the registry is used
# but the result will still reflect the configuration for that packer
# in the registry field.
#
return observation_packer_registry.makeField(
doc, default=None, optional=True, on_none=Instrument._make_default_dimension_packer_dispatch
)

@staticmethod
@final
def _make_default_dimension_packer_dispatch(
config_dict: Any, data_id: DataCoordinate, is_exposure: bool | None = None
) -> DimensionPacker:
"""Dispatch method used to invoke `_make_dimension_packer`.
This method constructs the appropriate `Instrument` subclass from
config and then calls its `_make_default_dimension_packer`.
It is called when (as usual) the field returned by
`make_dimension_packer_config_field` is left to its default selection
of `None`.
All arguments and return values are the same as
`_make_default_dimension_packer.`
"""
instrument = Instrument.from_data_id(data_id)
return instrument._make_default_dimension_packer(config_dict, data_id, is_exposure=is_exposure)

def _make_default_dimension_packer(
self,
config_dict: Any,
data_id: DataCoordinate,
is_exposure: bool | None = None,
default: str = "observation",
) -> DimensionPacker:
"""Construct return the default dimension packer for this instrument.
This method is a protected hook for subclasses to override the behavior
of `make_dimension_packer_config_field` when the packer is not selected
explicitly via configuration.
Parameters
----------
config_dict
Mapping attribute of a `lsst.pex.config.Config` instance that
corresponds to a field created by `make_dimension_packer_config`
(the actual type of this object is a `lsst.pex.config`
implementation detail).
data_id : `lsst.daf.butler.DataCoordinate`
Data ID that identifies at least the ``instrument`` dimension. For
most configurations this must have dimension records attached.
is_exposure : `bool`, optional
If `False`, construct a packer for visit+detector data IDs. If
`True`, construct a packer for exposure+detector data IDs. If
`None`, this is determined based on whether ``visit`` or
``exposure`` is present in ``data_id``, with ``visit`` checked
first and hence used if both are present.
default : `str`, optional
Registered name of the dimension packer to select when the
configured packer is `None` (as is usually the case). This is
intended primarily for derived classes delegating to `super` in
reimplementations of this method.
Returns
-------
packer : `lsst.daf.butler.DimensionPacker`
Object that packs {visit, detector} or {exposure, detector} data
IDs into integers.
"""
return config_dict.apply_with(default, data_id, is_exposure=is_exposure)
168 changes: 168 additions & 0 deletions python/lsst/pipe/base/_observation_dimension_packer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# This file is part of pipe_base.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = ("ObservationDimensionPacker", "ObservationDimensionPackerConfig", "observation_packer_registry")

from typing import Any, cast

from lsst.daf.butler import DataCoordinate, DimensionPacker
from lsst.pex.config import Config, Field, makeRegistry

observation_packer_registry = makeRegistry(
"Configurables that can pack visit+detector or exposure+detector data IDs into integers."
)


class ObservationDimensionPackerConfig(Config):
# Config fields are annotated as Any because support for better
# annotations is broken on Fields with optional=True.
n_detectors: Any = Field(
"Number of detectors, or, more precisely, one greater than the "
"maximum detector ID, for this instrument. "
"Default (None) obtains this value from the instrument dimension record. "
"This should rarely need to be overridden outside of tests.",
dtype=int,
default=None,
optional=True,
)
n_observations: Any = Field(
"Number of observations (visits or exposures, as per 'is_exposure`) "
"expected, or, more precisely, one greater than the maximum "
"visit/exposure ID. "
"Default (None) obtains this value from the instrument dimension record. "
"This should rarely need to be overridden outside of tests.",
dtype=int,
default=None,
optional=True,
)


class ObservationDimensionPacker(DimensionPacker):
"""A `DimensionPacker` for visit+detector or exposure+detector.
Parameters
----------
data_id : `lsst.daf.butler.DataCoordinate`
Data ID that identifies at least the ``instrument`` dimension. Must
have dimension records attached unless ``config.n_detectors`` and
``config.n_visits`` are both not `None`.
config : `ObservationDimensionPackerConfig`
Configuration for this dimension packer.
is_exposure : `bool`, optional
If `False`, construct a packer for visit+detector data IDs. If `True`,
construct a packer for exposure+detector data IDs. If `None`,
this is determined based on whether ``visit`` or ``exposure`` is
present in ``data_id``, with ``visit`` checked first and hence used if
both are present.
Notes
-----
The standard pattern for constructing instances of the class is to use
`Instrument.make_dimension_packer`; see that method for details.
This packer assumes all visit/exposure and detector IDs are sequential or
otherwise densely packed between zero and their upper bound, such that
``n_detectors`` * ``n_observations`` leaves plenty of bits remaining for
any other IDs that need to be included in the same integer (such as a
counter for Sources detected on an image with this data ID). Instruments
whose data ID values are not densely packed, should provide their own
`~lsst.daf.butler.DimensionPacker` that takes advantage of the structure
of its IDs to compress them into fewer bits.
"""

ConfigClass = ObservationDimensionPackerConfig

def __init__(
self,
data_id: DataCoordinate,
config: ObservationDimensionPackerConfig,
is_exposure: bool | None = None,
):
fixed = data_id.subset(data_id.universe.extract(["instrument"]))
if is_exposure is None:
if "visit" in data_id.graph.names:
is_exposure = False
elif "exposure" in data_id.graph.names:
is_exposure = True
else:
raise ValueError(
"'is_exposure' was not provided and 'data_id' has no visit or exposure value."
)
if is_exposure:
dimensions = fixed.universe.extract(["instrument", "exposure", "detector"])
else:
dimensions = fixed.universe.extract(["instrument", "visit", "detector"])
super().__init__(fixed, dimensions)
self.is_exposure = is_exposure
if config.n_detectors is not None:
self._n_detectors = config.n_detectors
else:
# Records accessed here should never be None; that possibility is
# only for non-dimension elements like join tables that are
# are sometimes not present in an expanded data ID.
self._n_detectors = fixed.records["instrument"].detector_max # type: ignore[union-attr]
if config.n_observations is not None:
self._n_observations = config.n_observations
elif self.is_exposure:
self._n_observations = fixed.records["instrument"].exposure_max # type: ignore[union-attr]
else:
self._n_observations = fixed.records["instrument"].visit_max # type: ignore[union-attr]
self._max_bits = (self._n_observations * self._n_detectors - 1).bit_length()

@property
def maxBits(self) -> int:
# Docstring inherited from DimensionPacker.maxBits
return self._max_bits

def _pack(self, dataId: DataCoordinate) -> int:
# Docstring inherited from DimensionPacker._pack
detector_id = cast(int, dataId["detector"])
if detector_id >= self._n_detectors:
raise ValueError(f"Detector ID {detector_id} is out of bounds; expected <{self._n_detectors}.")
observation_id = cast(int, dataId["exposure" if self.is_exposure else "visit"])
if observation_id >= self._n_observations:
raise ValueError(
f"{'Exposure' if self.is_exposure else 'Visit'} ID {observation_id} is out of bounds; "
f"expected <{self._n_observations}."
)
return detector_id + self._n_detectors * observation_id

def unpack(self, packedId: int) -> DataCoordinate:
# Docstring inherited from DimensionPacker.unpack
observation, detector = divmod(packedId, self._n_detectors)
return DataCoordinate.standardize(
{
"instrument": self.fixed["instrument"],
"detector": detector,
("exposure" if self.is_exposure else "visit"): observation,
},
graph=self.dimensions,
)


observation_packer_registry = makeRegistry(
"Configurables that can pack visit+detector or exposure+detector data IDs into integers. "
"Members of this registry should be callable with the same signature as "
"`lsst.pipe.base.ObservationDimensionPacker` construction."
)
observation_packer_registry.register("observation", ObservationDimensionPacker)

0 comments on commit b7b10cc

Please sign in to comment.