Skip to content

Commit

Permalink
Implement MetricTask.
Browse files Browse the repository at this point in the history
This commit includes MetricTask and unit test code, but not
documentation. A guide to the MetricTask framework is not being added
at this stage; it will be written once prototyping is complete.
  • Loading branch information
kfindeisen committed Dec 13, 2018
1 parent 37d7794 commit 706df7b
Show file tree
Hide file tree
Showing 7 changed files with 394 additions and 0 deletions.
3 changes: 3 additions & 0 deletions doc/lsst.verify/index.rst
Expand Up @@ -39,4 +39,7 @@ Python API reference
.. automodapi:: lsst.verify.report
:no-inheritance-diagram:

.. automodapi:: lsst.verify.compatibility
:no-inheritance-diagram:

.. _SQUASH: https://squash.lsst.codes
1 change: 1 addition & 0 deletions python/lsst/verify/__init__.py
Expand Up @@ -43,3 +43,4 @@
from .jobmetadata import *
from .job import *
from .output import *
from .metricTask import *
22 changes: 22 additions & 0 deletions python/lsst/verify/compatibility/__init__.py
@@ -0,0 +1,22 @@
# This file is part of verify.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from .metricTask import *
213 changes: 213 additions & 0 deletions python/lsst/verify/compatibility/metricTask.py
@@ -0,0 +1,213 @@
# This file is part of verify.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

__all__ = ["MetricTask"]

import abc

import lsst.pex.config
import lsst.pipe.base as pipeBase


class MetricTask(pipeBase.Task, metaclass=abc.ABCMeta):
"""A base class for tasks that compute exactly one metric from arbitrary
input datasets.
Parameters
----------
args
kwargs
Constructor parameters are the same as for
`lsst.pipe.base.PipelineTask`.
Notes
-----
In general, both the ``MetricTask``'s metric and its input data are
configurable. Metrics may be associated with a data ID at any level of
granularity, including repository-wide.
Like `lsst.pipe.base.PipelineTask`, this class should be customized by
overriding one of `run` or `adaptArgsAndRun`. For requirements on these
methods that are specific to ``MetricTask``, see `adaptArgsAndRun`.
.. note::
The API is designed to make it easy to convert all ``MetricTasks`` to
`~lsst.pipe.base.PipelineTask` later, but this class is *not* a
`~lsst.pipe.base.PipelineTask` and does not work with activators,
quanta, or `lsst.daf.butler`.
"""

# There may be a specialized MetricTaskConfig later, details TBD
ConfigClass = lsst.pex.config.Config

def adaptArgsAndRun(self, inputData, inputDataIds, outputDataId):
"""Compute a metric from in-memory data.
Parameters
----------
inputData : `dict` from `str` to any
Dictionary whose keys are the names of input parameters and values
are Python-domain data objects (or lists of objects) retrieved
from data butler. Accepting lists of objects is strongly
recommended; this allows metrics to vary their granularity up to
the granularity of the input data without the need for extensive
code changes. Input objects may be `None` to represent
missing data.
inputDataIds : `dict` from `str` to `list` of dataId
Dictionary whose keys are the names of input parameters and values
are data IDs (or lists of data IDs) that the task consumes for
corresponding dataset type. Data IDs are guaranteed to match data
objects in ``inputData``.
outputDataId : `dict` from `str` to dataId
Dictionary containing a single key, ``"measurement"``, which maps
to a single data ID for the measurement. The data ID must have the
same granularity as the metric.
Returns
-------
struct : `lsst.pipe.base.Struct`
A `~lsst.pipe.base.Struct` containing at least the
following component:
- ``measurement``: the value of the metric identified by
`getOutputMetricName`, computed from ``inputData``
(`lsst.verify.Measurement` or `None`). The measurement is
guaranteed to contain not only the value of the metric, but also
any mandatory supplementary information.
Raises
------
lsst.verify.MetricComputationError
Raised if an algorithmic or system error prevents calculation
of the metric. Examples include corrupted input data or
unavoidable exceptions raised by analysis code. The
`~lsst.verify.MetricComputationError` should be chained to a more
specific exception describing the root cause.
Not having enough data for a metric to be applicable is not an
error, and should not trigger this exception.
Notes
-----
This implementation calls `run` on the contents of ``inputData``,
followed by calling `addStandardMetadata` on the result before
returning it. Any subclass that overrides this method must also call
`addStandardMetadata` on its measurement before returning it.
`adaptArgsAndRun` and `run` should assume they take multiple input
datasets, regardless of the expected metric granularity. This rule may
be broken if it is impossible for more than one copy of a dataset
to exist.
All input data must be treated as optional. This maximizes the
``MetricTask``'s usefulness for incomplete pipeline runs or runs with
optional processing steps. If a metric cannot be calculated because
the necessary inputs are missing, the ``MetricTask`` must return `None`
in place of the measurement.
Examples
--------
Consider a metric that characterizes PSF variations across the entire
field of view, given processed images. Then, if `run` has the
signature ``run(images)``:
.. code-block:: py
inputData = {'images': [image1, image2, ...]}
inputDataIds = {'images': [{'visit': 42, 'ccd': 1},
{'visit': 42, 'ccd': 2},
...]}
outputDataId = {'measurement': {'visit': 42}}
result = task.adaptArgsAndRun(
inputData, inputDataIds, outputDataId)
"""
result = self.run(**inputData)
if result.measurement is not None:
self.addStandardMetadata(result.measurement,
outputDataId["measurement"])
return result

@classmethod
@abc.abstractmethod
def getInputDatasetTypes(cls, config):
"""Return input dataset types for this task.
Parameters
----------
config : ``cls.ConfigClass``
Configuration for this task.
Returns
-------
datasets : `dict` from `str` to `str`
Dictionary where the key is the name of the input dataset (must
match a parameter to `run`) and the value is the name of its
Butler dataset type.
"""

@classmethod
@abc.abstractmethod
def getOutputMetricName(cls, config):
"""Identify the metric calculated by this ``MetricTask``.
Parameters
----------
config : ``cls.ConfigClass``
Configuration for this ``MetricTask``.
Returns
-------
metric : `lsst.verify.Name`
The name of the metric computed by objects of this class when
configured with ``config``.
"""

def addStandardMetadata(self, measurement, outputDataId):
"""Add data ID-specific metadata required for all metrics.
This method currently does not add any metadata, but may do so
in the future.
Parameters
----------
measurement : `lsst.verify.Measurement`
The `~lsst.verify.Measurement` that the metadata are added to.
outputDataId : ``dataId``
The data ID to which the measurement applies, at the appropriate
level of granularity.
Notes
-----
This method must be called by any subclass that overrides
`adaptArgsAndRun`, but should be ignored otherwise. It should not be
overridden by subclasses.
This method is not responsible for shared metadata like the execution
environment (which should be added by this ``MetricTask``'s caller),
nor for metadata specific to a particular metric (which should be
added when the metric is calculated).
.. warning::
This method's signature will change whenever additional data needs
to be provided. This is a deliberate restriction to ensure that all
subclasses pass in the new data as well.
"""
pass
116 changes: 116 additions & 0 deletions python/lsst/verify/compatibility/testUtils.py
@@ -0,0 +1,116 @@
#
# This file is part of verify.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

__all__ = ["MetricTaskTestCase"]

import unittest.mock
import inspect

import lsst.utils.tests

from lsst.pipe.base import Struct
from lsst.verify import Measurement


class MetricTaskTestCase(lsst.utils.tests.TestCase):
"""Unit test base class for tests of `compatibility.MetricTask`.
This class provides tests of the generic ``MetricTask`` API. Subclasses
must override `taskFactory`, and may add extra tests for class-specific
functionality. If subclasses override `setUp`, they must call
`MetricTaskTestCase.setUp`.
"""

# For some reason, setUp for the test cases defined in MetricTaskTestCase
# calls the wrong factory if you make it a classmethod
taskFactory = None
"""A nullary callable that constructs the `compatibility.MetricTask`
to be tested.
If a concrete task's constructor satisfies the requirements, its type
object may be used as the factory.
"""

task = None
"""The ``MetricTask`` being tested by this object (`compatibility.MetricTask`).
This attribute is initialized automatically.
"""

taskClass = None
"""The type of `task` (`compatibility.MetricTask`-type).
This attribute is initialized automatically.
"""

def setUp(self):
"""Setup common to all MetricTask tests.
Notes
-----
This implementation calls `taskFactory`, then initializes `task`
and `taskClass`.
"""
self.task = self.taskFactory()
self.taskClass = type(self.task)

# Implementation classes will override run or adaptArgsAndRun. Can't
# implement most tests if they're mocked, risk excessive runtime if
# they aren't.

def testInputDatasetTypesKeys(self):
defaultInputs = self.taskClass.getInputDatasetTypes(self.task.config)
runParams = inspect.signature(self.taskClass.run).parameters

# Only way to check if run has been overridden?
if runParams.keys() != ['kwargs']:
self.assertSetEqual(
set(defaultInputs.keys()).union({'self'}),
set(runParams.keys()).union({'self'}),
"getInputDatasetTypes keys do not match run parameters")

def testAddStandardMetadata(self):
measurement = Measurement('foo.bar', 0.0)
dataId = {'tract': 42, 'patch': 3, 'filter': 'Ic'}
self.task.addStandardMetadata(measurement, dataId)
# Nothing to test until addStandardMetadata adds something

def testCallAddStandardMetadata(self):
dummy = Measurement('foo.bar', 0.0)
with unittest.mock.patch.multiple(
self.taskClass, autospec=True,
run=unittest.mock.DEFAULT,
addStandardMetadata=unittest.mock.DEFAULT) as mockDict:
mockDict['run'].return_value = Struct(measurement=dummy)

inputTypes = self.taskClass.getInputDatasetTypes(self.task.config)
inputParams = inputTypes.keys()
# Probably won't satisfy all adaptArgsAndRun specs,
# but hopefully works with most of them
dataId = {}
result = self.task.adaptArgsAndRun(
{key: [None] for key in inputParams},
{key: [dataId] for key in inputParams},
{'measurement': {}})
mockDict['addStandardMetadata'].assert_called_once_with(
self.task, result.measurement, dataId)

0 comments on commit 706df7b

Please sign in to comment.