lsst · kfindeisen · Dec 14, 2017 · Oct 12, 2017 · Oct 20, 2017 · Oct 23, 2017
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,8 @@ doc/*.tag
 doc/doxygen.conf
 doc/html
 doc/xml
+doc/_build
+doc/py-api/
 tests/.cache
 tests/.tests
 ups/*.cfgc

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ Other configuration options may be added in the future.
 
 ### Setting Up a Dataset
 
-`ap_verify` requires that all data be in a [dataset package](https://github.com/lsst-dm/ap_verify_dataset_template). It will create a workspace modeled after the package's `data` directory, then process any data found in the `raw` and `ref_cats` in the new workspace. Anything placed in `data` will be copied to a `ap_verify` run's workspace as-is, and must at least include a `_mapper` file naming the CameraMapper for the data.
+`ap_verify` requires that all data be in a [dataset package](https://github.com/lsst-dm/ap_verify_dataset_template). It will create a workspace modeled after the package's `repo` directory, then process any data found in the `raw` and `ref_cats` in the new workspace. Anything placed in `repo` will be copied to a `ap_verify` run's workspace as-is, and must at least include a `_mapper` file naming the CameraMapper for the data.
 
 The dataset package must work with eups, and must be registered in `config/dataset_config.yaml` in order for `ap_verify` to support it. `ap_verify` will use `eups setup` to prepare the dataset package and any dependencies; typically, they will include the `obs_` package for the instrument that took the data.
 

diff --git a/bin.src/ap_verify.py b/bin.src/ap_verify.py
@@ -21,7 +21,7 @@
 # see <http://www.lsstcorp.org/LegalNotices/>.
 #
 
-from lsst.ap.verify import run_ap_verify
+from lsst.ap.verify import runApVerify
 
 if __name__ == "__main__":
-    run_ap_verify()
+    runApVerify()
diff --git a/demo_run.py b/demo_run.py
@@ -35,7 +35,7 @@
 
 import lsst.log
 from lsst.verify import Job, Measurement
-from lsst.ap.verify import run_ap_verify
+from lsst.ap.verify import runApVerify
 
 visits = [410915, 410929, 410931, 410971, 410985, 410987,
           411021, 411035, 411037, 411055, 411069, 411071, 411255, 411269, 411271,
@@ -65,7 +65,7 @@ def processImage(visit, ccd):
     try:
         dataId = 'visit=%d ccdnum=%d filter=g' % (visit, ccd)
         args = '--dataset HiTS2015 --output temp/ --dataIdString "%s" --silent' % dataId
-        run_ap_verify(shlex.split(args))
+        runApVerify(shlex.split(args))
     finally:
         jobFile = 'ap_verify.verify.json'
         if os.path.isfile(jobFile):

diff --git a/doc/Makefile b/doc/Makefile
diff --git a/doc/_static/dummy.txt b/doc/_static/dummy.txt
diff --git a/doc/ap_verify/index.rst b/doc/ap_verify/index.rst
@@ -0,0 +1,27 @@
+.. _ap_verify-package:
+
+#########
+ap_verify
+#########
+
+The ``ap_verify`` package wraps `lsst.ap.pipe` with support for managing `lsst.verify` metrics.
+It allows `lsst.ap.pipe` to be run on standardized data repositories provided by the :ref:`dataset framework<ap-verify-datasets>`.
+
+Project info
+============
+
+Repository
+   https://github.com/lsst-dm/ap_verify
+
+JIRA component
+   `ap_verify <https://jira.lsstcorp.org/browse/DM/component/14167>`_
+
+Modules
+=======
+
+- :ref:`lsst.ap.verify <lsst.ap.verify>`
+- :ref:`lsst.ap.verify.measurements <lsst.ap.verify.measurements>`
+
+.. NOTE: Need pid and issuetype
+.. _`Create a ticket`: https://jira.lsstcorp.org/secure/CreateIssueDetails!init.jspa?pid=&issuetype=&components=14167
+
diff --git a/doc/conf.py b/doc/conf.py
@@ -9,10 +9,5 @@
     project_name="ap_verify",
     copyright="2017 Association of Univerities for "
               "Research in Astronomy, Inc.",
-    version=lsst.ap_verify.version.__version__,
-    doxygen_xml_dirname=None))
+    version=lsst.ap.verify.version.__version__))
 
-intersphinx_mapping['astropy'] = ('http://docs.astropy.org/en/stable', None)
-
-# DEBUG only
-automodsumm_writereprocessed = False
diff --git a/doc/index.rst b/doc/index.rst
@@ -0,0 +1,11 @@
+###############################
+ap_verify documentation preview
+###############################
+
+.. toctree::
+   :maxdepth: 1
+
+   ap_verify/index.rst
+   lsst.ap.verify/index.rst
+   lsst.ap.verify.measurements/index.rst
+
diff --git a/doc/lsst.ap.verify.measurements/index.rst b/doc/lsst.ap.verify.measurements/index.rst
@@ -0,0 +1,19 @@
+.. currentmodule:: lsst.ap.verify.measurements
+
+.. _lsst.ap.verify.measurements:
+
+###########################
+lsst.ap.verify.measurements
+###########################
+
+The ``lsst.ap.verify.measurements`` package provides implementation code for metrics defined for the AP pipeline.
+It exposes functions that measure all applicable metrics from task metadata or processed Butler repositories.
+The set of metrics measured is deliberately kept opaque, so that ``ap_verify`` itself need not be modified every time a new metric is implemented.
+
+.. _lsst-ap-verify-measurements-overview:
+
+Python API reference
+====================
+
+.. automodapi:: lsst.ap.verify.measurements
+
diff --git a/doc/lsst.ap.verify/command-line-reference.rst b/doc/lsst.ap.verify/command-line-reference.rst
@@ -0,0 +1,141 @@
+.. _ap-verify-cmd:
+
+.. program:: ap_verify.py
+
+######################
+Command-Line Reference
+######################
+
+This page describes the command-line arguments and environment variables used by ``ap_verify``.
+
+Signature and syntax
+====================
+
+The basic call signature of ``ap_verify`` is:
+
+.. code-block:: sh
+
+   python ap_verify.py --dataset DATASET --output OUTPUTREPO --dataIdString DATAID
+
+These three arguments (or replacing ``--output`` with ``--rerun``) are mandatory, all others are optional.
+
+Status code
+===========
+
+.. TODO: should we require that ap_verify and ap_pipe follow the CmdLineTask convention? (DM-12853)
+
+``ap_verify`` returns a status code of ``0`` if the pipeline ran to completion.
+If the pipeline fails, the status code will be an interpreter-dependent nonzero value.
+
+Named arguments
+===============
+
+Required arguments are :option:`--dataset`, :option:`--dataIdString`, and exactly one of :option:`--output` or :option:`--rerun`.
+
+.. option:: --dataIdString <dataId>
+
+   **Butler data ID.**
+
+   The input data ID is required for all ``ap_verify`` runs except when using :option:`--help` or :option:`--version`.
+
+   Specify data ID to process using data ID syntax.
+   For example, ``--dataIdString "visit=12345 ccd=1 filter=g"``.
+
+   Currently this argument is heavily restricted compared to its :ref:`command line task counterpart<command-line-task-dataid-howto>`.
+   In particular, the dataId must specify exactly one visit and exactly one CCD, and may not be left blank to mean "all data".
+
+.. option:: --dataset <dataset_name>
+
+   **Input dataset designation.**
+
+   The input dataset is required for all ``ap_verify`` runs except when using :option:`--help` or :option:`--version`.
+
+   The argument is a unique name for the dataset, which can be associated with a repository in the :ref:`configuration file<ap-verify-configuration-dataset>`.
+   See :ref:`ap-verify-dataset-name` for more information on dataset names.
+
+   Allowed names can be queried using the :option:`--help` argument.
+
+.. option:: -h, --help
+
+   **Print help.**
+
+   The help is equivalent to this documentation page, describing command-line arguments.
+
+.. option:: -j <processes>, --processes <processes>
+
+   **Number of processes to use.**
+
+   When ``processes`` is larger than 1 the pipeline may use the Python `multiprocessing` module to parallelize processing of multiple datasets across multiple processors.
+
+   .. note::
+
+      This option is provided for forward-compatibility, but is not yet supported by ``ap_pipe``.
+
+.. option:: --output <output_repo>
+
+   **Output data repository URI or path.**
+
+   The output dataset or :option:`--rerun` is required for all ``ap_verify`` runs except when using :option:`--help` or :option:`--version`.
+
+   The output data repository will be created if it does not exist.
+   The path may be absolute or relative to the current working directory.
+
+   ``--output`` may not be used with the :option:`--rerun` argument.
+
+   See :doc:`command-line-task-data-repo-howto` for background.
+
+   .. TODO: I think the --rerun argument may have been a mistake -- it's almost entirely not quite unlike its command line task equivalent  (DM-12853)
+
+.. option:: --rerun <output>
+
+   **Specify output "rerun".**
+
+   The rerun or :option:`--output` is required for all ``ap_verify`` runs except when using :option:`--help` or :option:`--version`.
+
+   For ``ap_verify``, a rerun is an output repository relative to the dataset directory (as determined by :option:`--dataset`).
+   This is different from command-line task reruns, which have an input repository and chain the rerun to it.
+   An input rerun cannot be specified.
+
+   ``--rerun`` may not be used with the :option:`--output` argument.
+
+.. option:: --silent
+
+   **Do not report measurements to SQuaSH.**
+
+   Disables upload of measurements, so that ``ap_verify`` can be run for testing purposes by developers.
+
+   .. note::
+
+      Ingestion of `lsst.verify` metrics is not yet supported by SQuaSH, so this flag should always be provided for now.
+
+.. option:: --version
+
+   **Print version number.**
+
+   Since ``ap_verify`` is not yet officially part of the Stack, the version number is arbitrary.
+
+
+.. _command-line-task-envvar:
+
+Environment variables
+=====================
+
+The :envvar:`SQUASH_USER`, :envvar:`SQUASH_PASSWORD`, and :envvar:`SQUASH_URL` environment variables are used by :ref:`the verify framework<lsst.verify>` to configure SQuaSH upload.
+:envvar:`SQUASH_USER` and :envvar:`SQUASH_PASSWORD` must be defined in any environment where ``ap_verify`` is run unless the :option:`--silent` flag is used.
+
+.. TODO: remove this once `lsst.verify` documents them, and update the link (DM-12849)
+
+.. envvar:: SQUASH_USER
+
+   User name to use for SQuaSH submissions.
+
+.. envvar:: SQUASH_PASSWORD
+
+   Unencrypted password for :envvar:`SQUASH_USER`.
+
+.. envvar:: SQUASH_URL
+
+   The location for a SQuaSH REST API. Defaults to the SQuaSH server at ``lsst.codes``.
+
+.. _command-line-task-envvar-examples:
+
diff --git a/doc/lsst.ap.verify/configuration.rst b/doc/lsst.ap.verify/configuration.rst
@@ -0,0 +1,42 @@
+.. _ap-verify-configuration:
+
+############################
+Configuration File Reference
+############################
+
+This page describes the file-based configuration options used by ``ap_verify``.
+Most users should not need to adjust these settings, but they allow capabilities such as registering new :ref:`datasets<ap-verify-datasets>`.
+
+.. TODO: more generic name? or split up file? (DM-12850)
+
+The ``ap_verify`` configuration file is located at :file:`config/dataset_config.yaml`.
+It consists of a list of dictionaries, each representing specific aspects of the program.
+
+.. _ap-verify-configuration-dataset:
+
+datasets
+========
+
+The ``datasets`` dictionary maps dataset names (which must be provided on the ``ap_verify`` command line) to GitHub repository names.
+Adding a dataset to the config is necessary for ``ap_verify`` to recognize it; in practice, the entry will be made once by the dataset author and then committed.
+A dataset must still be :ref:`installed<ap-verify-datasets-install>` on the machine before it can be used.
+
+.. _ap-verify-configuration-measurements:
+
+measurements
+============
+
+.. warning::
+
+   The metrics being used by ``ap_verify`` are still being defined.
+   The syntax used to register them will likely change, and may be moved to a dedicated package entirely.
+   This section of the configuration file should be treated as preliminary and subject to change.
+
+The ``measurements`` dictionary contains sub-dictionaries for each kind of metric.
+Currently there is only one:
+
+``timing``
+    A dictionary from tasks to the metrics that time them.
+    Subtasks must be identified by the name the parent task assigns them, and should be prefixed by the parent task name (as in "imageDifference:detection") to avoid ambiguity.
+    Metrics must use the full name following the convention of `lsst.verify.metrics`, as in "meas_algorithms.SourceDetectionTime".
+
diff --git a/doc/lsst.ap.verify/datasets-butler.rst b/doc/lsst.ap.verify/datasets-butler.rst
@@ -0,0 +1,20 @@
+.. _ap-verify-datasets-butler:
+
+################################
+Datasets vs. Butler Repositories
+################################
+
+Datasets are organized using a :ref:`specific directory structure<ap-verify-datasets-structure>` instead of an :ref:`LSST Butler repository<butler>`.
+This is by design:
+:ref:`ingestion of observatory files into a repository<ingest>` is considered part of the pipeline system being tested by ``ap_verify``, so ``ap_verify`` must be fed uningested data as its input.
+The ingestion step creates a valid repository that is then used by the rest of the pipeline.
+
+.. TODO: depends on whether subdirectories need a particular structure (DM-12851)
+
+A secondary benefit of this approach is that dataset maintainers do not need to manually ensure that the Git repository associated with a dataset remains a valid Butler repository despite changes to the dataset.
+The dataset format merely requires that files be segregated into appropriate directories, a much looser integrity constraint.
+
+While datasets are not Butler repositories themselves, the dataset format includes a directory, :file:`repo`, that serves as a template for the post-ingestion repository.
+This template helps ensure that all repositories based on the dataset will be properly set up, in particular that any observatory-specific settings will be applied.
+:file:`repo` is never modified by ``ap_verify``; all repositories created by the pipeline must be located elsewhere, whether or not they are backed by the file system.
+