Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-21098: Isolate outdated Gen 3 methods in gen2tasks #55

Merged
merged 8 commits into from
Oct 7, 2019
2 changes: 1 addition & 1 deletion doc/lsst.verify/tasks/lsst.verify.gen2tasks.MetricTask.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ In Depth
Subclassing
-----------

``MetricTask`` is primarily customized using the `~MetricTask.run` or `~MetricTask.adaptArgsAndRun` methods.
``MetricTask`` is primarily customized using the `~MetricTask.run` method.
Each subclass must also implement the `~MetricTask.getOutputMetricName` method.

The task config should use `lsst.pipe.base.PipelineTaskConnections` to identify input datasets as if it were a `~lsst.pipe.base.PipelineTask`.
Expand Down
67 changes: 51 additions & 16 deletions python/lsst/verify/gen2tasks/metricTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,9 @@ class MetricTask(pipeBase.Task, metaclass=abc.ABCMeta):
granularity, including repository-wide.

Like `lsst.pipe.base.PipelineTask`, this class should be customized by
overriding one of `run` or `adaptArgsAndRun` and by providing an
`~lsst.pipe.base.InputDatasetField` for each parameter of `run`. For
requirements on these methods that are specific to ``MetricTask``, see
`adaptArgsAndRun`.
overriding `run` and by providing a `lsst.pipe.base.connectionTypes.Input`
for each parameter of `run`. For requirements that are specific to
``MetricTask``, see `run`.

.. note::
The API is designed to make it easy to convert all ``MetricTasks`` to
Expand All @@ -62,8 +61,53 @@ class MetricTask(pipeBase.Task, metaclass=abc.ABCMeta):
def __init__(self, **kwargs):
super().__init__(**kwargs)

@abc.abstractmethod
def run(self, **kwargs):
"""Run the MetricTask on in-memory data.

Parameters
----------
**kwargs
Keyword arguments matching the inputs given in the class config;
see `lsst.pipe.base.PipelineTask.run` for more details.

Returns
-------
struct : `lsst.pipe.base.Struct`
A `~lsst.pipe.base.Struct` containing at least the
following component:

- ``measurement``: the value of the metric identified by
`getOutputMetricName` (`lsst.verify.Measurement` or `None`).
This method is not responsible for adding mandatory metadata
(e.g., the data ID); this is handled by the caller.

Raises
------
lsst.verify.tasks.MetricComputationError
Raised if an algorithmic or system error prevents calculation
of the metric. Examples include corrupted input data or
unavoidable exceptions raised by analysis code. The
`~lsst.verify.tasks.MetricComputationError` should be chained to a
more specific exception describing the root cause.

Not having enough data for a metric to be applicable is not an
error, and should not trigger this exception.

Notes
-----
All input data must be treated as optional. This maximizes the
``MetricTask``'s usefulness for incomplete pipeline runs or runs with
optional processing steps. If a metric cannot be calculated because
the necessary inputs are missing, the ``MetricTask`` must return `None`
in place of the measurement.
"""

def adaptArgsAndRun(self, inputData, inputDataIds, outputDataId):
"""Compute a metric from in-memory data.
"""A wrapper around `run` used by
`~lsst.verify.gen2tasks.MetricsControllerTask`.

Task developers should not need to call or override this method.

Parameters
----------
Expand Down Expand Up @@ -110,14 +154,7 @@ def adaptArgsAndRun(self, inputData, inputDataIds, outputDataId):
-----
This implementation calls `run` on the contents of ``inputData``,
followed by calling `addStandardMetadata` on the result before
returning it. Any subclass that overrides this method must also call
`addStandardMetadata` on its measurement before returning it.

All input data must be treated as optional. This maximizes the
``MetricTask``'s usefulness for incomplete pipeline runs or runs with
optional processing steps. If a metric cannot be calculated because
the necessary inputs are missing, the ``MetricTask`` must return `None`
in place of the measurement.
returning it.

Examples
--------
Expand Down Expand Up @@ -225,9 +262,7 @@ def addStandardMetadata(self, measurement, outputDataId):

Notes
-----
This method must be called by any subclass that overrides
`adaptArgsAndRun`, but should be ignored otherwise. It should not be
overridden by subclasses.
This method should not be overridden by subclasses.

This method is not responsible for shared metadata like the execution
environment (which should be added by this ``MetricTask``'s caller),
Expand Down
8 changes: 5 additions & 3 deletions python/lsst/verify/gen2tasks/testUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,15 @@ def setUp(self):
self.task = self.makeTask()
self.taskClass = type(self.task)

# Implementation classes will override run or adaptArgsAndRun. Can't
# implement most tests if they're mocked, risk excessive runtime if
# they aren't.
# Implementation classes will override run. Can't implement most tests if
# it's mocked, risk excessive runtime if it isn't.

def testInputDatasetTypesKeys(self):
defaultInputs = self.taskClass.getInputDatasetTypes(self.task.config)
runParams = inspect.signature(self.taskClass.run).parameters
# Filter out keywords with defaults
runParams = {name: param for name, param in runParams.items()
if param.default is param.empty}

# Only way to check if run has been overridden?
if runParams.keys() != ['kwargs']:
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/verify/tasks/metadataMetricTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def makeMeasurement(self, values):
------
lsst.verify.tasks.MetricComputationError
Raised if an algorithmic or system error prevents calculation of
the metric. See `adaptArgsAndRun` for expected behavior.
the metric. See `run` for expected behavior.
"""

def run(self, metadata):
Expand Down Expand Up @@ -347,7 +347,7 @@ def makeMeasurement(self, values):
------
lsst.verify.tasks.MetricComputationError
Raised if an algorithmic or system error prevents calculation of
the metric. See `adaptArgsAndRun` for expected behavior.
the metric. See `run` for expected behavior.
"""

def run(self, metadata):
Expand Down
74 changes: 9 additions & 65 deletions python/lsst/verify/tasks/ppdbMetricTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ class PpdbMetricTask(MetricTask):
Notes
-----
This class should be customized by overriding `makeMeasurement` and
`getOutputMetricName`. You should not need to override `run` or
`adaptArgsAndRun`.
`getOutputMetricName`. You should not need to override `run`.
"""
# Design note: makeMeasurement is an overrideable method rather than a
# subtask to keep the configs for `MetricsControllerTask` as simple as
Expand Down Expand Up @@ -236,85 +235,35 @@ def makeMeasurement(self, dbHandle, outputDataId):
------
MetricComputationError
Raised if an algorithmic or system error prevents calculation of
the metric. See `adaptArgsAndRun` for expected behavior.
the metric. See `run` for expected behavior.
"""

def run(self, dbInfo):
def run(self, dbInfo, outputDataId={}):
"""Compute a measurement from a database.

Parameters
----------
dbInfo
The dataset (of the type indicated by `getInputDatasetTypes`) from
The dataset (of the type indicated by the config) from
which to load the database.
outputDataId: any data ID type, optional
The output data ID for the metric value. Defaults to the empty ID,
representing a value that covers the entire dataset.

Returns
-------
result : `lsst.pipe.base.Struct`
Result struct with component:

``measurement``
the value of the metric computed over the entire database
(`lsst.verify.Measurement` or `None`)
the value of the metric (`lsst.verify.Measurement` or `None`)

Raises
------
MetricComputationError
Raised if an algorithmic or system error prevents calculation of
the metric.

Notes
-----
This method is provided purely for compatibility with frameworks that
don't support `adaptArgsAndRun`. The latter method should be considered
the primary entry point for this task, as it lets callers define
metrics that apply to only a subset of the data.
"""
return self.adaptArgsAndRun({"dbInfo": dbInfo},
{"dbInfo": {}},
{"measurement": {}})

def adaptArgsAndRun(self, inputData, inputDataIds, outputDataId):
"""Compute a measurement from a database.

Parameters
----------
inputData : `dict` [`str`, any]
Dictionary with one key:

``"dbInfo"``
The dataset (of the type indicated by `getInputDatasetTypes`)
from which to load the database.
inputDataIds : `dict` [`str`, data ID]
Dictionary with one key:

``"dbInfo"``
The data ID of the input data. Since there can only be one
prompt products database per dataset, the value must be an
empty data ID.
outputDataId : `dict` [`str`, data ID]
Dictionary with one key:

``"measurement"``
The data ID for the measurement, at the appropriate level
of granularity for the metric.

Returns
-------
result : `lsst.pipe.base.Struct`
Result struct with component:

``measurement``
the value of the metric computed over the portion of the
dataset that matches ``outputDataId``
(`lsst.verify.Measurement` or `None`)

Raises
------
lsst.verify.tasks.MetricComputationError
Raised if an algorithmic or system error prevents calculation of
the metric.

Notes
-----
This implementation calls
Expand All @@ -323,16 +272,11 @@ def adaptArgsAndRun(self, inputData, inputDataIds, outputDataId):
`makeMeasurement`. The result of `makeMeasurement` is returned to
the caller.
"""
dataId = outputDataId["measurement"]
dbInfo = inputData["dbInfo"]

db = self.dbLoader.run(dbInfo).ppdb

if db is not None:
measurement = self.makeMeasurement(db, dataId)
measurement = self.makeMeasurement(db, outputDataId)
else:
measurement = None
if measurement is not None:
self.addStandardMetadata(measurement, dataId)

return Struct(measurement=measurement)
50 changes: 32 additions & 18 deletions python/lsst/verify/tasks/testUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

__all__ = ["MetadataMetricTestCase", "PpdbMetricTestCase"]

import unittest.mock
from unittest.mock import patch

from lsst.pex.config import Config, ConfigField
Expand All @@ -41,10 +42,6 @@ class MetadataMetricTestCase(MetricTaskTestCase):
being tested.
"""

def testInputDatasetTypes(self):
defaultInputs = self.taskClass.getInputDatasetTypes(self.task.config)
self.assertEqual(defaultInputs.keys(), {"metadata"})

@staticmethod
def _takesScalarMetadata(task):
return task.areInputDatasetsScalar(task.config)['metadata']
Expand Down Expand Up @@ -97,34 +94,51 @@ def testPassThroughRun(self):
self.task.run([None])


class DummyConfig(Config):
ppdb = ConfigField(dtype=PpdbConfig, doc="Mandatory field")


class PpdbMetricTestCase(MetricTaskTestCase):
"""Unit test base class for tests of `PpdbMetricTask`.

Notes
-----
Subclasses must override
`~lsst.verify.gen2tasks.MetricTaskTestCase.makeTask` for the concrete task
being tested.
being tested. Subclasses that use a custom DbLoader should also
override `makeDbInfo`.
"""

def testInputDatasetTypes(self):
defaultInputs = self.taskClass.getInputDatasetTypes(self.task.config)
self.assertEqual(defaultInputs.keys(), {"dbInfo"})
@classmethod
def makeDbInfo(cls):
"""Return an object that can be passed as input to a `PpdbMetricTask`.

This method is intended for generic tests that simply need to call
``run`` on some valid input. If a test depends on specific input, it
should create that input directly.

The default implementation creates a `~lsst.pex.config.Config` that
will be accepted by `~lsst.verify.tasks.ConfigPpdbLoader`. Test suites
that use a different loader should override this method.
"""
class DummyConfig(Config):
ppdb = ConfigField(dtype=PpdbConfig, doc="Mandatory field")

return DummyConfig()

def testValidRun(self):
config = DummyConfig()
with patch.object(self.task, "makeMeasurement") \
as mockWorkhorse:
self.task.run(config)
info = self.makeDbInfo()
with patch.object(self.task, "makeMeasurement") as mockWorkhorse:
self.task.run(info)
mockWorkhorse.assert_called_once()

def testDataIdRun(self):
info = self.makeDbInfo()
with patch.object(self.task, "makeMeasurement") as mockWorkhorse:
dataId = {'visit': 42}
self.task.run(info, outputDataId=dataId)
mockWorkhorse.assert_called_once_with(
unittest.mock.ANY, {'visit': 42})

def testPassThroughRun(self):
with patch.object(self.task, "makeMeasurement",
side_effect=MetricComputationError):
config = DummyConfig()
info = self.makeDbInfo()
with self.assertRaises(MetricComputationError):
self.task.run(config)
self.task.run(info)