diff --git a/doc/lsst.verify/index.rst b/doc/lsst.verify/index.rst index c5c23484..db9395fb 100644 --- a/doc/lsst.verify/index.rst +++ b/doc/lsst.verify/index.rst @@ -39,4 +39,7 @@ Python API reference .. automodapi:: lsst.verify.report :no-inheritance-diagram: +.. automodapi:: lsst.verify.compatibility + :no-inheritance-diagram: + .. _SQUASH: https://squash.lsst.codes diff --git a/python/lsst/verify/__init__.py b/python/lsst/verify/__init__.py index fcfe1e5e..6f624e6f 100644 --- a/python/lsst/verify/__init__.py +++ b/python/lsst/verify/__init__.py @@ -43,3 +43,4 @@ from .jobmetadata import * from .job import * from .output import * +from .metricTask import * diff --git a/python/lsst/verify/compatibility/__init__.py b/python/lsst/verify/compatibility/__init__.py new file mode 100644 index 00000000..f90efac2 --- /dev/null +++ b/python/lsst/verify/compatibility/__init__.py @@ -0,0 +1,22 @@ +# This file is part of verify. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from .metricTask import * diff --git a/python/lsst/verify/compatibility/metricTask.py b/python/lsst/verify/compatibility/metricTask.py new file mode 100644 index 00000000..195c662f --- /dev/null +++ b/python/lsst/verify/compatibility/metricTask.py @@ -0,0 +1,213 @@ +# This file is part of verify. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +__all__ = ["MetricTask"] + +import abc + +import lsst.pex.config +import lsst.pipe.base as pipeBase + + +class MetricTask(pipeBase.Task, metaclass=abc.ABCMeta): + """A base class for tasks that compute exactly one metric from arbitrary + input datasets. + + Parameters + ---------- + args + kwargs + Constructor parameters are the same as for + `lsst.pipe.base.PipelineTask`. + + Notes + ----- + In general, both the ``MetricTask``'s metric and its input data are + configurable. Metrics may be associated with a data ID at any level of + granularity, including repository-wide. + + Like `lsst.pipe.base.PipelineTask`, this class should be customized by + overriding one of `run` or `adaptArgsAndRun`. For requirements on these + methods that are specific to ``MetricTask``, see `adaptArgsAndRun`. + + .. note:: + The API is designed to make it easy to convert all ``MetricTasks`` to + `~lsst.pipe.base.PipelineTask` later, but this class is *not* a + `~lsst.pipe.base.PipelineTask` and does not work with activators, + quanta, or `lsst.daf.butler`. + """ + + # There may be a specialized MetricTaskConfig later, details TBD + ConfigClass = lsst.pex.config.Config + + def adaptArgsAndRun(self, inputData, inputDataIds, outputDataId): + """Compute a metric from in-memory data. + + Parameters + ---------- + inputData : `dict` from `str` to any + Dictionary whose keys are the names of input parameters and values + are Python-domain data objects (or lists of objects) retrieved + from data butler. Accepting lists of objects is strongly + recommended; this allows metrics to vary their granularity up to + the granularity of the input data without the need for extensive + code changes. Input objects may be `None` to represent + missing data. + inputDataIds : `dict` from `str` to `list` of dataId + Dictionary whose keys are the names of input parameters and values + are data IDs (or lists of data IDs) that the task consumes for + corresponding dataset type. Data IDs are guaranteed to match data + objects in ``inputData``. + outputDataId : `dict` from `str` to dataId + Dictionary containing a single key, ``"measurement"``, which maps + to a single data ID for the measurement. The data ID must have the + same granularity as the metric. + + Returns + ------- + struct : `lsst.pipe.base.Struct` + A `~lsst.pipe.base.Struct` containing at least the + following component: + + - ``measurement``: the value of the metric identified by + `getOutputMetricName`, computed from ``inputData`` + (`lsst.verify.Measurement` or `None`). The measurement is + guaranteed to contain not only the value of the metric, but also + any mandatory supplementary information. + + Raises + ------ + lsst.verify.MetricComputationError + Raised if an algorithmic or system error prevents calculation + of the metric. Examples include corrupted input data or + unavoidable exceptions raised by analysis code. The + `~lsst.verify.MetricComputationError` should be chained to a more + specific exception describing the root cause. + + Not having enough data for a metric to be applicable is not an + error, and should not trigger this exception. + + Notes + ----- + This implementation calls `run` on the contents of ``inputData``, + followed by calling `addStandardMetadata` on the result before + returning it. Any subclass that overrides this method must also call + `addStandardMetadata` on its measurement before returning it. + + `adaptArgsAndRun` and `run` should assume they take multiple input + datasets, regardless of the expected metric granularity. This rule may + be broken if it is impossible for more than one copy of a dataset + to exist. + + All input data must be treated as optional. This maximizes the + ``MetricTask``'s usefulness for incomplete pipeline runs or runs with + optional processing steps. If a metric cannot be calculated because + the necessary inputs are missing, the ``MetricTask`` must return `None` + in place of the measurement. + + Examples + -------- + Consider a metric that characterizes PSF variations across the entire + field of view, given processed images. Then, if `run` has the + signature ``run(images)``: + + .. code-block:: py + + inputData = {'images': [image1, image2, ...]} + inputDataIds = {'images': [{'visit': 42, 'ccd': 1}, + {'visit': 42, 'ccd': 2}, + ...]} + outputDataId = {'measurement': {'visit': 42}} + result = task.adaptArgsAndRun( + inputData, inputDataIds, outputDataId) + """ + result = self.run(**inputData) + if result.measurement is not None: + self.addStandardMetadata(result.measurement, + outputDataId["measurement"]) + return result + + @classmethod + @abc.abstractmethod + def getInputDatasetTypes(cls, config): + """Return input dataset types for this task. + + Parameters + ---------- + config : ``cls.ConfigClass`` + Configuration for this task. + + Returns + ------- + datasets : `dict` from `str` to `str` + Dictionary where the key is the name of the input dataset (must + match a parameter to `run`) and the value is the name of its + Butler dataset type. + """ + + @classmethod + @abc.abstractmethod + def getOutputMetricName(cls, config): + """Identify the metric calculated by this ``MetricTask``. + + Parameters + ---------- + config : ``cls.ConfigClass`` + Configuration for this ``MetricTask``. + + Returns + ------- + metric : `lsst.verify.Name` + The name of the metric computed by objects of this class when + configured with ``config``. + """ + + def addStandardMetadata(self, measurement, outputDataId): + """Add data ID-specific metadata required for all metrics. + + This method currently does not add any metadata, but may do so + in the future. + + Parameters + ---------- + measurement : `lsst.verify.Measurement` + The `~lsst.verify.Measurement` that the metadata are added to. + outputDataId : ``dataId`` + The data ID to which the measurement applies, at the appropriate + level of granularity. + + Notes + ----- + This method must be called by any subclass that overrides + `adaptArgsAndRun`, but should be ignored otherwise. It should not be + overridden by subclasses. + + This method is not responsible for shared metadata like the execution + environment (which should be added by this ``MetricTask``'s caller), + nor for metadata specific to a particular metric (which should be + added when the metric is calculated). + + .. warning:: + This method's signature will change whenever additional data needs + to be provided. This is a deliberate restriction to ensure that all + subclasses pass in the new data as well. + """ + pass diff --git a/python/lsst/verify/compatibility/testUtils.py b/python/lsst/verify/compatibility/testUtils.py new file mode 100644 index 00000000..c2cf5f4a --- /dev/null +++ b/python/lsst/verify/compatibility/testUtils.py @@ -0,0 +1,116 @@ +# +# This file is part of verify. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +__all__ = ["MetricTaskTestCase"] + +import unittest.mock +import inspect + +import lsst.utils.tests + +from lsst.pipe.base import Struct +from lsst.verify import Measurement + + +class MetricTaskTestCase(lsst.utils.tests.TestCase): + """Unit test base class for tests of `compatibility.MetricTask`. + + This class provides tests of the generic ``MetricTask`` API. Subclasses + must override `taskFactory`, and may add extra tests for class-specific + functionality. If subclasses override `setUp`, they must call + `MetricTaskTestCase.setUp`. + """ + + # For some reason, setUp for the test cases defined in MetricTaskTestCase + # calls the wrong factory if you make it a classmethod + taskFactory = None + """A nullary callable that constructs the `compatibility.MetricTask` + to be tested. + + If a concrete task's constructor satisfies the requirements, its type + object may be used as the factory. + """ + + task = None + """The ``MetricTask`` being tested by this object (`compatibility.MetricTask`). + + This attribute is initialized automatically. + """ + + taskClass = None + """The type of `task` (`compatibility.MetricTask`-type). + + This attribute is initialized automatically. + """ + + def setUp(self): + """Setup common to all MetricTask tests. + + Notes + ----- + This implementation calls `taskFactory`, then initializes `task` + and `taskClass`. + """ + self.task = self.taskFactory() + self.taskClass = type(self.task) + + # Implementation classes will override run or adaptArgsAndRun. Can't + # implement most tests if they're mocked, risk excessive runtime if + # they aren't. + + def testInputDatasetTypesKeys(self): + defaultInputs = self.taskClass.getInputDatasetTypes(self.task.config) + runParams = inspect.signature(self.taskClass.run).parameters + + # Only way to check if run has been overridden? + if runParams.keys() != ['kwargs']: + self.assertSetEqual( + set(defaultInputs.keys()).union({'self'}), + set(runParams.keys()).union({'self'}), + "getInputDatasetTypes keys do not match run parameters") + + def testAddStandardMetadata(self): + measurement = Measurement('foo.bar', 0.0) + dataId = {'tract': 42, 'patch': 3, 'filter': 'Ic'} + self.task.addStandardMetadata(measurement, dataId) + # Nothing to test until addStandardMetadata adds something + + def testCallAddStandardMetadata(self): + dummy = Measurement('foo.bar', 0.0) + with unittest.mock.patch.multiple( + self.taskClass, autospec=True, + run=unittest.mock.DEFAULT, + addStandardMetadata=unittest.mock.DEFAULT) as mockDict: + mockDict['run'].return_value = Struct(measurement=dummy) + + inputTypes = self.taskClass.getInputDatasetTypes(self.task.config) + inputParams = inputTypes.keys() + # Probably won't satisfy all adaptArgsAndRun specs, + # but hopefully works with most of them + dataId = {} + result = self.task.adaptArgsAndRun( + {key: [None] for key in inputParams}, + {key: [dataId] for key in inputParams}, + {'measurement': {}}) + mockDict['addStandardMetadata'].assert_called_once_with( + self.task, result.measurement, dataId) diff --git a/python/lsst/verify/metricTask.py b/python/lsst/verify/metricTask.py new file mode 100644 index 00000000..a1f553f4 --- /dev/null +++ b/python/lsst/verify/metricTask.py @@ -0,0 +1,37 @@ +# This file is part of project_name. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +__all__ = ["MetricComputationError"] + + +class MetricComputationError(RuntimeError): + """This class represents unresolvable errors in computing a metric. + + `compatibility.MetricTask` raises ``MetricComputationError`` instead of + other data- or processing-related exceptions to let code that calls a mix + of data processing and metric tasks distinguish between the two. + Therefore, most ``MetricComputationError`` instances should be chained to + another exception representing the underlying problem. + """ + pass + +# TODO: implement MetricTask once PipelineTask is ready for general use diff --git a/ups/verify.table b/ups/verify.table index dc17b28d..4df5fbe6 100644 --- a/ups/verify.table +++ b/ups/verify.table @@ -8,6 +8,8 @@ setupRequired(utils) setupRequired(pex_exceptions) setupRequired(log) setupRequired(pytest_flake8) +setupRequired(pex_config) +setupRequired(pipe_base) # verify_metrics is the default metrics repo and is used in tests setupRequired(verify_metrics)