Implement MetricTask.

This commit includes MetricTask and unit test code, but not documentation. A guide to the MetricTask framework is not being added at this stage; it will be written once prototyping is complete.
lsst · Dec 13, 2018 · 706df7b · 706df7b
1 parent 37d7794
commit 706df7b
Show file tree

Hide file tree

Showing 7 changed files with 394 additions and 0 deletions.
diff --git a/doc/lsst.verify/index.rst b/doc/lsst.verify/index.rst
@@ -39,4 +39,7 @@ Python API reference
 .. automodapi:: lsst.verify.report
    :no-inheritance-diagram:
 
+.. automodapi:: lsst.verify.compatibility
+   :no-inheritance-diagram:
+
 .. _SQUASH: https://squash.lsst.codes
diff --git a/python/lsst/verify/__init__.py b/python/lsst/verify/__init__.py
@@ -43,3 +43,4 @@
 from .jobmetadata import *
 from .job import *
 from .output import *
+from .metricTask import *
diff --git a/python/lsst/verify/compatibility/__init__.py b/python/lsst/verify/compatibility/__init__.py
@@ -0,0 +1,22 @@
+# This file is part of verify.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from .metricTask import *
diff --git a/python/lsst/verify/compatibility/metricTask.py b/python/lsst/verify/compatibility/metricTask.py
@@ -0,0 +1,213 @@
+# This file is part of verify.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+__all__ = ["MetricTask"]
+
+import abc
+
+import lsst.pex.config
+import lsst.pipe.base as pipeBase
+
+
+class MetricTask(pipeBase.Task, metaclass=abc.ABCMeta):
+    """A base class for tasks that compute exactly one metric from arbitrary
+    input datasets.
+
+    Parameters
+    ----------
+    args
+    kwargs
+        Constructor parameters are the same as for
+        `lsst.pipe.base.PipelineTask`.
+
+    Notes
+    -----
+    In general, both the ``MetricTask``'s metric and its input data are
+    configurable. Metrics may be associated with a data ID at any level of
+    granularity, including repository-wide.
+
+    Like `lsst.pipe.base.PipelineTask`, this class should be customized by
+    overriding one of `run` or `adaptArgsAndRun`. For requirements on these
+    methods that are specific to ``MetricTask``, see `adaptArgsAndRun`.
+
+    .. note::
+        The API is designed to make it easy to convert all ``MetricTasks`` to
+        `~lsst.pipe.base.PipelineTask` later, but this class is *not* a
+        `~lsst.pipe.base.PipelineTask` and does not work with activators,
+        quanta, or `lsst.daf.butler`.
+    """
+
+    # There may be a specialized MetricTaskConfig later, details TBD
+    ConfigClass = lsst.pex.config.Config
+
+    def adaptArgsAndRun(self, inputData, inputDataIds, outputDataId):
+        """Compute a metric from in-memory data.
+
+        Parameters
+        ----------
+        inputData : `dict` from `str` to any
+            Dictionary whose keys are the names of input parameters and values
+            are Python-domain data objects (or lists of objects) retrieved
+            from data butler. Accepting lists of objects is strongly
+            recommended; this allows metrics to vary their granularity up to
+            the granularity of the input data without the need for extensive
+            code changes. Input objects may be `None` to represent
+            missing data.
+        inputDataIds : `dict` from `str` to `list` of dataId
+            Dictionary whose keys are the names of input parameters and values
+            are data IDs (or lists of data IDs) that the task consumes for
+            corresponding dataset type. Data IDs are guaranteed to match data
+            objects in ``inputData``.
+        outputDataId : `dict` from `str` to dataId
+            Dictionary containing a single key, ``"measurement"``, which maps
+            to a single data ID for the measurement. The data ID must have the
+            same granularity as the metric.
+
+        Returns
+        -------
+        struct : `lsst.pipe.base.Struct`
+            A `~lsst.pipe.base.Struct` containing at least the
+            following component:
+
+            - ``measurement``: the value of the metric identified by
+              `getOutputMetricName`, computed from ``inputData``
+              (`lsst.verify.Measurement` or `None`). The measurement is
+              guaranteed to contain not only the value of the metric, but also
+              any mandatory supplementary information.
+
+        Raises
+        ------
+        lsst.verify.MetricComputationError
+            Raised if an algorithmic or system error prevents calculation
+            of the metric. Examples include corrupted input data or
+            unavoidable exceptions raised by analysis code. The
+            `~lsst.verify.MetricComputationError` should be chained to a more
+            specific exception describing the root cause.
+
+            Not having enough data for a metric to be applicable is not an
+            error, and should not trigger this exception.
+
+        Notes
+        -----
+        This implementation calls `run` on the contents of ``inputData``,
+        followed by calling `addStandardMetadata` on the result before
+        returning it. Any subclass that overrides this method must also call
+        `addStandardMetadata` on its measurement before returning it.
+
+        `adaptArgsAndRun` and `run` should assume they take multiple input
+        datasets, regardless of the expected metric granularity. This rule may
+        be broken if it is impossible for more than one copy of a dataset
+        to exist.
+
+        All input data must be treated as optional. This maximizes the
+        ``MetricTask``'s usefulness for incomplete pipeline runs or runs with
+        optional processing steps. If a metric cannot be calculated because
+        the necessary inputs are missing, the ``MetricTask`` must return `None`
+        in place of the measurement.
+
+        Examples
+        --------
+        Consider a metric that characterizes PSF variations across the entire
+        field of view, given processed images. Then, if `run` has the
+        signature ``run(images)``:
+
+        .. code-block:: py
+
+            inputData = {'images': [image1, image2, ...]}
+            inputDataIds = {'images': [{'visit': 42, 'ccd': 1},
+                                       {'visit': 42, 'ccd': 2},
+                                       ...]}
+            outputDataId = {'measurement': {'visit': 42}}
+            result = task.adaptArgsAndRun(
+                inputData, inputDataIds, outputDataId)
+        """
+        result = self.run(**inputData)
+        if result.measurement is not None:
+            self.addStandardMetadata(result.measurement,
+                                     outputDataId["measurement"])
+        return result
+
+    @classmethod
+    @abc.abstractmethod
+    def getInputDatasetTypes(cls, config):
+        """Return input dataset types for this task.
+
+        Parameters
+        ----------
+        config : ``cls.ConfigClass``
+            Configuration for this task.
+
+        Returns
+        -------
+        datasets : `dict` from `str` to `str`
+            Dictionary where the key is the name of the input dataset (must
+            match a parameter to `run`) and the value is the name of its
+            Butler dataset type.
+        """
+
+    @classmethod
+    @abc.abstractmethod
+    def getOutputMetricName(cls, config):
+        """Identify the metric calculated by this ``MetricTask``.
+
+        Parameters
+        ----------
+        config : ``cls.ConfigClass``
+            Configuration for this ``MetricTask``.
+
+        Returns
+        -------
+        metric : `lsst.verify.Name`
+            The name of the metric computed by objects of this class when
+            configured with ``config``.
+        """
+
+    def addStandardMetadata(self, measurement, outputDataId):
+        """Add data ID-specific metadata required for all metrics.
+
+        This method currently does not add any metadata, but may do so
+        in the future.
+
+        Parameters
+        ----------
+        measurement : `lsst.verify.Measurement`
+            The `~lsst.verify.Measurement` that the metadata are added to.
+        outputDataId : ``dataId``
+            The data ID to which the measurement applies, at the appropriate
+            level of granularity.
+
+        Notes
+        -----
+        This method must be called by any subclass that overrides
+        `adaptArgsAndRun`, but should be ignored otherwise. It should not be
+        overridden by subclasses.
+
+        This method is not responsible for shared metadata like the execution
+        environment (which should be added by this ``MetricTask``'s caller),
+        nor for metadata specific to a particular metric (which should be
+        added when the metric is calculated).
+
+        .. warning::
+            This method's signature will change whenever additional data needs
+            to be provided. This is a deliberate restriction to ensure that all
+            subclasses pass in the new data as well.
+        """
+        pass
diff --git a/python/lsst/verify/compatibility/testUtils.py b/python/lsst/verify/compatibility/testUtils.py
@@ -0,0 +1,116 @@
+#
+# This file is part of verify.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+__all__ = ["MetricTaskTestCase"]
+
+import unittest.mock
+import inspect
+
+import lsst.utils.tests
+
+from lsst.pipe.base import Struct
+from lsst.verify import Measurement
+
+
+class MetricTaskTestCase(lsst.utils.tests.TestCase):
+    """Unit test base class for tests of `compatibility.MetricTask`.
+
+    This class provides tests of the generic ``MetricTask`` API. Subclasses
+    must override `taskFactory`, and may add extra tests for class-specific
+    functionality. If subclasses override `setUp`, they must call
+    `MetricTaskTestCase.setUp`.
+    """
+
+    # For some reason, setUp for the test cases defined in MetricTaskTestCase
+    # calls the wrong factory if you make it a classmethod
+    taskFactory = None
+    """A nullary callable that constructs the `compatibility.MetricTask`
+    to be tested.
+
+    If a concrete task's constructor satisfies the requirements, its type
+    object may be used as the factory.
+    """
+
+    task = None
+    """The ``MetricTask`` being tested by this object (`compatibility.MetricTask`).
+
+    This attribute is initialized automatically.
+    """
+
+    taskClass = None
+    """The type of `task` (`compatibility.MetricTask`-type).
+
+    This attribute is initialized automatically.
+    """
+
+    def setUp(self):
+        """Setup common to all MetricTask tests.
+
+        Notes
+        -----
+        This implementation calls `taskFactory`, then initializes `task`
+        and `taskClass`.
+        """
+        self.task = self.taskFactory()
+        self.taskClass = type(self.task)
+
+    # Implementation classes will override run or adaptArgsAndRun. Can't
+    # implement most tests if they're mocked, risk excessive runtime if
+    # they aren't.
+
+    def testInputDatasetTypesKeys(self):
+        defaultInputs = self.taskClass.getInputDatasetTypes(self.task.config)
+        runParams = inspect.signature(self.taskClass.run).parameters
+
+        # Only way to check if run has been overridden?
+        if runParams.keys() != ['kwargs']:
+            self.assertSetEqual(
+                set(defaultInputs.keys()).union({'self'}),
+                set(runParams.keys()).union({'self'}),
+                "getInputDatasetTypes keys do not match run parameters")
+
+    def testAddStandardMetadata(self):
+        measurement = Measurement('foo.bar', 0.0)
+        dataId = {'tract': 42, 'patch': 3, 'filter': 'Ic'}
+        self.task.addStandardMetadata(measurement, dataId)
+        # Nothing to test until addStandardMetadata adds something
+
+    def testCallAddStandardMetadata(self):
+        dummy = Measurement('foo.bar', 0.0)
+        with unittest.mock.patch.multiple(
+                self.taskClass, autospec=True,
+                run=unittest.mock.DEFAULT,
+                addStandardMetadata=unittest.mock.DEFAULT) as mockDict:
+            mockDict['run'].return_value = Struct(measurement=dummy)
+
+            inputTypes = self.taskClass.getInputDatasetTypes(self.task.config)
+            inputParams = inputTypes.keys()
+            # Probably won't satisfy all adaptArgsAndRun specs,
+            # but hopefully works with most of them
+            dataId = {}
+            result = self.task.adaptArgsAndRun(
+                {key: [None] for key in inputParams},
+                {key: [dataId] for key in inputParams},
+                {'measurement': {}})
+            mockDict['addStandardMetadata'].assert_called_once_with(
+                self.task, result.measurement, dataId)