lsst · kfindeisen · Aug 19, 2020 · Aug 5, 2020 · Jul 27, 2020 · Jul 27, 2020
diff --git a/doc/lsst.ap.verify/command-line-reference.rst b/doc/lsst.ap.verify/command-line-reference.rst
@@ -29,8 +29,10 @@ These two arguments are mandatory, all others are optional (though use of either
 Status code
 ===========
 
-Like :ref:`command-line tasks <command-line-task-argument-reference>`, :command:`ap_verify.py` returns the number of data IDs that could not be processed (i.e., 0 on a complete success).
-However, an uncaught exception causes :command:`ap_verify.py` to return an interpreter-dependent nonzero value instead (also as for command-line tasks).
+:command:`ap_verify.py` returns 0 on success, and a non-zero value if there were any processing problems.
+In :option:`--gen2` mode, the status code is the number of data IDs that could not be processed, like for :ref:`command-line tasks <command-line-task-argument-reference>`.
+
+With both :option:`--gen2` and :option:`--gen3`, an uncaught exception may cause :command:`ap_verify.py` to return an interpreter-dependent nonzero value instead of the above.
 
 .. _ap-verify-cmd-args:
 
@@ -43,8 +45,10 @@ Required arguments are :option:`--dataset` and :option:`--output`.
 
    **Butler data ID.**
 
-   Specify data ID to process using :doc:`data ID syntax </modules/lsst.pipe.base/command-line-task-dataid-howto>`.
-   For example, ``--id "visit=12345 ccd=1..6 filter=g"``.
+   Specify data ID to process.
+   If using :option:`--gen2`, this should use :doc:`data ID syntax </modules/lsst.pipe.base/command-line-task-dataid-howto>`, such as ``--id "visit=12345 ccd=1..6 filter=g"``.
+   If using :option:`--gen3`, this should use :ref:`dimension expression syntax <daf_butler_dimension_expressions>`, such as ``--id "visit=12345 and detector in (1..6) and abstract_filter='g'"``.
+
    Multiple copies of this argument are allowed.
    For compatibility with the syntax used by command line tasks, ``--id`` with no argument processes all data IDs.
 
@@ -63,12 +67,13 @@ Required arguments are :option:`--dataset` and :option:`--output`.
 
 .. option:: --dataset-metrics-config <filename>
 
-   **Input dataset-level metrics config.**
+   **Input dataset-level metrics config. (Gen 2 only)**
 
    A config file containing a `~lsst.verify.gen2tasks.MetricsControllerConfig`, which specifies which metrics are measured and sets any options.
    If this argument is omitted, :file:`config/default_dataset_metrics.py` will be used.
 
    Use :option:`--image-metrics-config` to configure image-level metrics instead.
+   For the Gen 3 equivalent to this option, see :option:`--pipeline`.
    See also :doc:`new-metrics`.
 
 .. option:: --gen2
@@ -99,17 +104,18 @@ Required arguments are :option:`--dataset` and :option:`--output`.
 
 .. option:: --image-metrics-config <filename>
 
-   **Input image-level metrics config.**
+   **Input image-level metrics config. (Gen 2 only)**
 
    A config file containing a `~lsst.verify.gen2tasks.MetricsControllerConfig`, which specifies which metrics are measured and sets any options.
    If this argument is omitted, :file:`config/default_image_metrics.py` will be used.
 
    Use :option:`--dataset-metrics-config` to configure dataset-level metrics instead.
+   For the Gen 3 equivalent to this option, see :option:`--pipeline`.
    See also :doc:`new-metrics`.
 
 .. option:: --metrics-file <filename>
 
-   **Output metrics file.**
+   **Output metrics file. (Gen 2 only)**
 
    The template for a file to contain metrics measured by ``ap_verify``, in a format readable by the :doc:`lsst.verify</modules/lsst.verify/index>` framework.
    The string ``{dataId}`` shall be replaced with the data ID associated with the job, and its use is strongly recommended.
@@ -123,3 +129,19 @@ Required arguments are :option:`--dataset` and :option:`--output`.
 
    The workspace will be created if it does not exist, and will contain both input and output repositories required for processing the data.
    The path may be absolute or relative to the current working directory.
+
+.. option:: -p, --pipeline <filename>
+
+   **Custom ap_verify pipeline. (Gen 3 only)**
+
+   A pipeline definition file containing a custom verification pipeline.
+   If omitted, :file:`pipelines/ApVerify.yaml` will be used.
+
+   The most common use for a custom pipeline is adding or removing metrics to be run along with the AP pipeline.
+
+   .. note::
+
+      At present, ap_verify assumes that the provided pipeline is some superset of the AP pipeline.
+      It will likely crash if any AP tasks are missing.
+
+   For the Gen 2 equivalent to this option, see :option:`--dataset-metrics-config` and :option:`--image-metrics-config`.
diff --git a/doc/lsst.ap.verify/failsafe.rst b/doc/lsst.ap.verify/failsafe.rst
@@ -26,13 +26,16 @@ In particular, where possible it will :ref:`preserve metrics<ap-verify-failsafe-
 Recovering metrics from partial runs
 ====================================
 
-``ap_verify`` produces some measurements even if the pipeline cannot run to completion.
+In Gen 2 mode, ``ap_verify`` produces some measurements even if the pipeline cannot run to completion.
 Specifically, if a task fails, any previously completed tasks that store measurements to disk will have done so.
 In addition, if a metric cannot be computed, ``ap_verify`` may attempt to store the values of the remaining metrics.
 
 If the pipeline fails, ``ap_verify`` may not preserve measurements computed from the dataset.
 Once the framework for handling metrics is finalized, ``ap_verify`` may be able to offer a broader guarantee that does not depend on how or where any individual metric is implemented.
 
+The Gen 3 framework is not yet mature enough to handle partial failures.
+It is expected that Gen 3 processing will eventually be able to compute all metrics from completed tasks.
+
 Further reading
 ===============
 

diff --git a/doc/lsst.ap.verify/new-metrics.rst b/doc/lsst.ap.verify/new-metrics.rst
@@ -12,14 +12,20 @@ Configuring metrics for ap_verify
 Each metric has an associated :lsst-task:`~lsst.verify.gen2tasks.metricTask.MetricTask`, typically in the package associated with the metric.
 For example, the code for computing ``ip_diffim.numSciSources`` can be found in the ``ip_diffim`` package, not in ``ap_verify``.
 
-The metrics computed by ``ap_verify`` are configured through two command-line options, :option:`--image-metrics-config` and :option:`--dataset-metrics-config`.
+In Gen 2, the metrics computed by ``ap_verify`` are configured through two command-line options, :option:`--image-metrics-config` and :option:`--dataset-metrics-config`.
 These options each take a config file for a `~lsst.verify.gen2tasks.metricsControllerTask.MetricsControllerConfig`, the former for metrics that are computed over individual images and the latter for metrics that apply to the entire dataset.
 Typically, a file configures each metric through ``config.measurers[<name>]``; see the documentation for :lsst-task:`~lsst.verify.gen2tasks.MetricsControllerTask` for examples.
 
 The ``ap_verify`` package provides two config files in the :file:`config/` directory, which define the image- and dataset-level configs that are run by default (for example, during CI).
 These files feature complex logic to minimize code duplication and minimize the work in adding new metrics.
 This complexity is not required by ``MetricsControllerTask``; a config that's just a list of assignments will also work.
 
+In Gen 3, the metrics computed by ``ap_verify`` are configured as part of the pipeline.
+The pipeline can be overridden using the :option:`--pipeline` command-line option.
+
+The ``ap_verify`` package provides a default-instrumented pipeline in :file:`pipelines/ApVerify.yaml`.
+To make it easy to mix and match metrics, all :lsst-task:`~lsst.verify.gen2tasks.metricTask.MetricTask` configuration is done in separate sub-pipelines that are then included in :file:`ApVerify.yaml`.
+
 Further reading
 ===============
 

diff --git a/doc/lsst.ap.verify/running.rst b/doc/lsst.ap.verify/running.rst
@@ -26,8 +26,8 @@ The dataset names are a placeholder for a future data repository versioning syst
 
 .. _ap-verify-run-output:
 
-How to run ap_verify in a new workspace
-=======================================
+How to run ap_verify in a new workspace (Gen 2 pipeline)
+========================================================
 
 Using the `HiTS 2015 <https://github.com/lsst/ap_verify_hits2015/>`_ dataset as an example, one can run :command:`ap_verify.py` as follows:
 
@@ -45,9 +45,9 @@ while the output is:
 
 * :command:`workspaces/hits/` is the location where the pipeline will create any :ref:`Butler repositories<command-line-task-data-repo-using-uris>` necessary,
 
-This call will create a new directory at :file:`workspaces/hits`, ingest the HiTS data into a new repository based on :file:`<hits-data>/repo/`, then run visit 412518 through the entire AP pipeline.
+This call will create a new directory at :file:`workspaces/hits`, ingest the HiTS data into a new repository based on :file:`<hits-data>/repo/`, then run visits 412518 and 412568 through the entire AP pipeline.
 
-It's also possible to run an entire dataset by omitting the :command:`--id` argument (as some datasets are very large, do this with caution):
+It's also possible to run an entire dataset by omitting the :option:`--id` argument (as some datasets are very large, do this with caution):
 
 .. prompt:: bash
 
@@ -58,6 +58,25 @@ It's also possible to run an entire dataset by omitting the :command:`--id` argu
    The command-line interface for :command:`ap_verify.py` is at present more limited than those of command-line tasks.
    See the :doc:`command-line-reference` for details.
 
+.. _ap-verify-run-output-gen3:
+
+How to run ap_verify in a new workspace (Gen 3 pipeline)
+========================================================
+
+The command for running the pipeline on Gen 3 data is almost identical to Gen 2:
+
+.. prompt:: bash
+
+   ap_verify.py --dataset HiTS2015 --gen3 --id "visit in (412518, 412568) and abstract_filter='g'" --output workspaces/hits/
+
+The only differences are substituting :option:`--gen3` for :option:`--gen2`, and formatting the (optional) data ID in the :ref:`Gen 3 query syntax <daf_butler_dimension_expressions>`.
+
+.. note::
+
+   Because the science pipelines are still being converted to Gen 3, Gen 3 processing may not be supported for all ap_verify datasets.
+   See the individual dataset's documentation for more details.
+
+
 .. _ap-verify-run-ingest:
 
 How to run ingestion by itself
@@ -78,8 +97,8 @@ Other options from :command:`ap_verify.py` are not available.
 
 .. _ap-verify-results:
 
-How to use measurements of metrics
-==================================
+How to use measurements of metrics (Gen 2 pipeline)
+===================================================
 
 After ``ap_verify`` has run, it will produce files named, by default, :file:`ap_verify.<dataId>.verify.json` in the caller's directory.
 The file name may be customized using the :option:`--metrics-file` command-line argument.
@@ -88,6 +107,19 @@ These files contain metric measurements in ``lsst.verify`` format, and can be lo
 If the pipeline is interrupted by a fatal error, completed measurements will be saved to metrics files for debugging purposes.
 See the :ref:`error-handling policy <ap-verify-failsafe-partialmetric>` for details.
 
+.. _ap-verify-results-gen3:
+
+How to use measurements of metrics (Gen 3 pipeline)
+===================================================
+
+After ``ap_verify`` has run, it will produce Butler datasets named ``metricValue_<metric package>_<metric>``.
+These can be queried, like any Butler dataset, using methods like `~lsst.daf.butler.Registry.queryDatasetTypes` and `~lsst.daf.butler.Butler.get`.
+
+.. note::
+
+   Not all metric values need have the same data ID as the data run through the pipeline.
+   For example, metrics describing the full focal plane have a visit but no detector.
+
 Further reading
 ===============
 

diff --git a/pipelines/ApVerify.yaml b/pipelines/ApVerify.yaml
@@ -0,0 +1,19 @@
+# Gen 3 pipeline for ap_verify
+# This concatenates various lsst.verify metrics to an AP pipeline
+
+description: Fully instrumented AP pipeline
+inherits:
+    - location: $AP_PIPE_DIR/pipelines/ApPipe.yaml
+    - location: $AP_VERIFY_DIR/pipelines/MetricsRuntime.yaml
+    - location: $AP_VERIFY_DIR/pipelines/MetricsMisc.yaml
+tasks:
+    diaPipe:
+        # TODO: how to prevent duplication with ApPipe definition?
+        class: lsst.ap.association.DiaPipelineTask
+        config:
+            apdb.isolation_level: READ_UNCOMMITTED
+            doPackageAlerts: True
+contracts:
+    # Metric inputs must match pipeline outputs
+    - imageDifference.connections.coaddName == fracDiaSourcesToSciSources.connections.coaddName
+    - imageDifference.connections.fakesType == fracDiaSourcesToSciSources.connections.fakesType
diff --git a/pipelines/MetricsMisc.yaml b/pipelines/MetricsMisc.yaml
@@ -0,0 +1,24 @@
+# Miscellaneous metrics for Alert Production
+# In the future, these might be placed in task-specific pipelines (for debugging)
+# or grouped by their datasets (to optimize expensive Butler reads)
+
+description: Miscelaneous AP Pipeline metrics
+tasks:
+    numNewDiaObjects:
+        class: lsst.ap.association.metrics.NumberNewDiaObjectsMetricTask
+        config:
+            connections.labelName: diaPipe  # partial name of metadata dataset
+    numUnassociatedDiaObjects:
+        class: lsst.ap.association.metrics.NumberUnassociatedDiaObjectsMetricTask
+        config:
+            connections.labelName: diaPipe
+    fracUpdatedDiaObjects:
+        class: lsst.ap.association.metrics.FractionUpdatedDiaObjectsMetricTask
+        config:
+            connections.labelName: diaPipe
+    totalUnassociatedDiaObjects:
+        class: lsst.ap.association.metrics.TotalUnassociatedDiaObjectsMetricTask
+    numSciSources:
+        class: lsst.ip.diffim.metrics.NumberSciSourcesMetricTask
+    fracDiaSourcesToSciSources:
+        class: lsst.ip.diffim.metrics.FractionDiaSourcesToSciSourcesMetricTask
diff --git a/pipelines/MetricsRuntime.yaml b/pipelines/MetricsRuntime.yaml
@@ -0,0 +1,88 @@
+# Timing and system resource metrics for Alert Production
+
+description: Runtime metrics (customized for AP pipeline)
+tasks:
+    timing_isr:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: ip_isr  # metrics package
+            connections.metric: IsrTime  # metric name
+            connections.labelName: isr   # partial name of metadata dataset
+            metadataDimensions: [instrument, exposure, detector]  # TimingMetricTask assumes visit
+            target: isr.run              # method name in metadata. Usually matches label for top-level tasks
+    timing_charImage:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: pipe_tasks
+            connections.metric: CharacterizeImageTime
+            connections.labelName: charImage
+            target: characterizeImage.run
+    timing_calibrate:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: pipe_tasks
+            connections.metric: CalibrateTime
+            connections.labelName: calibrate
+            target: calibrate.run
+    timing_imageDifference:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: pipe_tasks
+            connections.metric: ImageDifferenceTime
+            connections.labelName: imageDifference
+            metadataDimensions: [instrument, visit, detector, skymap]
+            target: imageDifference.run
+    timing_imageDifference_astrometer:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: meas_astrom
+            connections.metric: AstrometryTime
+            connections.labelName: imageDifference
+            metadataDimensions: [instrument, visit, detector, skymap]
+            target: imageDifference:astrometer.loadAndMatch
+    timing_imageDifference_register:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: pipe_tasks
+            connections.metric: RegisterImageTime
+            connections.labelName: imageDifference
+            metadataDimensions: [instrument, visit, detector, skymap]
+            target: imageDifference:register.run
+    timing_imageDifference_subtract:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: ip_diffim
+            connections.metric: ImagePsfMatchTime
+            connections.labelName: imageDifference
+            metadataDimensions: [instrument, visit, detector, skymap]
+            target: imageDifference:subtract.subtractExposures
+    timing_imageDifference_detection:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: meas_algorithms
+            connections.metric: SourceDetectionTime
+            connections.labelName: imageDifference
+            metadataDimensions: [instrument, visit, detector, skymap]
+            target: imageDifference:detection.run
+    timing_imageDifference_measurement:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: ip_diffim
+            connections.metric: DipoleFitTime
+            connections.labelName: imageDifference
+            metadataDimensions: [instrument, visit, detector, skymap]
+            target: imageDifference:measurement.run
+    timing_diaPipe_associator:
+        class: lsst.verify.tasks.commonMetrics.TimingMetricTask
+        config:
+            connections.package: ap_association
+            connections.metric: AssociationTime
+            connections.labelName: diaPipe
+            target: diaPipe:associator.run
+    memory_apPipe:
+        class: lsst.verify.tasks.commonMetrics.MemoryMetricTask
+        config:
+            connections.package: ap_pipe
+            connections.metric: ApPipeMemory
+            connections.labelName: diaPipe
+            target: diaPipe.run  # Memory use is peak over process, so measure last task
diff --git a/python/lsst/ap/verify/ap_verify.py b/python/lsst/ap/verify/ap_verify.py
@@ -37,7 +37,7 @@
 from .dataset import Dataset
 from .ingestion import ingestDataset, ingestDatasetGen3
 from .metrics import MetricsParser, computeMetrics
-from .pipeline_driver import ApPipeParser, runApPipe
+from .pipeline_driver import ApPipeParser, runApPipeGen2, runApPipeGen3
 from .workspace import WorkspaceGen2, WorkspaceGen3
 
 
@@ -162,14 +162,19 @@ def runApVerify(cmdLine=None):
     args = _ApVerifyParser().parse_args(args=cmdLine)
     log.debug('Command-line arguments: %s', args)
 
-    workspace = WorkspaceGen2(args.output)
-    ingestDataset(args.dataset, workspace)
-
-    log.info('Running pipeline...')
-    apPipeResults = runApPipe(workspace, args)
-    computeMetrics(workspace, apPipeResults.parsedCmd.id, args)
-
-    return _getCmdLineExitStatus(apPipeResults.resultList)
+    if args.useGen3:
+        workspace = WorkspaceGen3(args.output)
+        ingestDatasetGen3(args.dataset, workspace)
+        log.info('Running pipeline...')
+        # Gen 3 pipeline includes both AP and metrics
+        return runApPipeGen3(workspace, args)
+    else:
+        workspace = WorkspaceGen2(args.output)
+        ingestDataset(args.dataset, workspace)
+        log.info('Running pipeline...')
+        apPipeResults = runApPipeGen2(workspace, args)
+        computeMetrics(workspace, apPipeResults.parsedCmd.id, args)
+        return _getCmdLineExitStatus(apPipeResults.resultList)
 
 
 def _getCmdLineExitStatus(resultList):