lsst · kfindeisen · Feb 23, 2021 · Feb 17, 2021 · Feb 17, 2021 · Feb 17, 2021
diff --git a/bin.src/gen3_to_job.py b/bin.src/gen3_to_job.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+from lsst.verify.bin.jobReporter import main, build_argparser
+
+
+if __name__ == "__main__":
+    parser = build_argparser()
+    args = parser.parse_args()
+    main(args.repository,
+         args.collection,
+         args.metrics_package,
+         args.spec,
+         args.dataset_name)
diff --git a/doc/conf.py b/doc/conf.py
@@ -1,15 +1,10 @@
-#!/usr/bin/env python
 """Sphinx configurations to build package documentation."""
 
-from documenteer.sphinxconfig.stackconf import build_package_configs
+from documenteer.conf.pipelinespkg import *
 
-import lsst.verify
 
-
-globals().update(build_package_configs(
-    project_name='verify',
-    version=lsst.verify.version.__version__,
-    doxygen_xml_dirname=None))
-
-# DEBUG only
-automodsumm_writereprocessed = False
+project = "verify"
+html_theme_options["logotext"] = project
+html_title = project
+html_short_title = project
+doxylink = {}
diff --git a/doc/lsst.verify/index.rst b/doc/lsst.verify/index.rst
@@ -81,6 +81,7 @@ Script reference
    :maxdepth: 1
 
    scripts/dispatch_verify.py
+   scripts/gen3_to_job.py
    scripts/inspect_job.py
    scripts/lint_metrics.py
 

diff --git a/doc/lsst.verify/scripts/gen3_to_job.py.rst b/doc/lsst.verify/scripts/gen3_to_job.py.rst
@@ -0,0 +1,3 @@
+.. autoprogram:: lsst.verify.bin.jobReporter:build_argparser()
+   :prog: gen3_to_job.py
+   :groups:
diff --git a/python/lsst/verify/bin/jobReporter.py b/python/lsst/verify/bin/jobReporter.py
@@ -0,0 +1,140 @@
+import argparse
+import json
+import time
+
+from lsst.verify import Job, MetricSet
+from lsst.daf.butler import Butler
+
+
+__all__ = ["main", "JobReporter", "build_argparser"]
+
+
+def build_argparser():
+    desc = 'Produce a Job object which can either be used ' \
+           'to build a local report or to ship to SQuaSH.'
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument(
+        'repository', type=str,
+        help='Path to a valid gen3 repository')
+    parser.add_argument(
+        'collection', type=str,
+        help='Collection to search for metric measurement values')
+    parser.add_argument(
+        '--metrics_package', type=str,
+        help='Metrics namespace to filter by. If omitted, all metrics '
+             'are processed.')
+    parser.add_argument(
+        '--spec', type=str, default="design",
+        help='Spec level to apply: minimum, design, or stretch')
+    parser.add_argument(
+        '--dataset_name', type=str, required=True,
+        help='Name of the dataset for which the report is being generated.'
+             'This is the desired ci_dataset tag in SQuaSH.')
+    return parser
+
+
+def main(repository, collection, metrics_package, spec, dataset_name):
+    """Extract metric values from a Gen 3 repository and rewrite them to disk
+    in Job format.
+
+    Parameters
+    ----------
+    Parameters are the same as for the `JobReporter` class.
+    """
+    jr = JobReporter(repository,
+                     collection,
+                     metrics_package,
+                     spec,
+                     dataset_name)
+    jobs = jr.run()
+    if len(jobs) == 0:
+        raise RuntimeError('Job reporter returned no jobs.')
+    for k, v in jobs.items():
+        filename = f"{metrics_package or 'all'}_{spec}_{k}_{time.time()}.json"
+        with open(filename, 'w') as fh:
+            json.dump(v.json, fh, indent=2, sort_keys=True)
+
+
+class JobReporter:
+    """A class for extracting metric values from a Gen 3 repository and
+    repackaging them as Job objects.
+
+    Parameters
+    ----------
+    repository : `str`
+        Path to a Butler configuration YAML file or a directory containing one.
+    collection : `str`
+        Name of the collection to search for metric values.
+    metrics_package : `str` or `None`
+        If provided, the namespace by which to filter selected metrics.
+    spec : `str`
+        The level of specification to filter metrics by.
+    dataset_name : `str`
+        The name of the dataset to report to SQuaSH through the
+        ``ci_dataset`` tag.
+    """
+
+    def __init__(self,
+                 repository,
+                 collection,
+                 metrics_package,
+                 spec,
+                 dataset_name):
+        # Hard coding verify_metrics as the packager for now.
+        # It would be easy to pass this in as an argument, if necessary.
+        self.metrics = MetricSet.load_metrics_package(
+            package_name_or_path='verify_metrics',
+            subset=metrics_package)
+        self.butler = Butler(repository)
+        self.registry = self.butler.registry
+        self.spec = spec
+        self.collection = collection
+        self.dataset_name = dataset_name
+
+    def run(self):
+        """Collate job information.
+
+        Returns
+        -------
+        jobs : `dict` [`str`, `lsst.verify.Job`]
+            A mapping of `~lsst.verify.Job` objects, indexed by a string
+            representation of their data ID.
+        """
+        jobs = {}
+        for metric in self.metrics:
+            dataset = f'metricvalue_{metric.package}_{metric.metric}'
+            datasetRefs = list(self.registry.queryDatasets(dataset,
+                               collections=self.collection))
+            for ref in datasetRefs:
+                m = self.butler.get(ref, collections=self.collection)
+                # make the name the same as what SQuaSH Expects
+                m.metric_name = metric
+
+                # queryDatasets guarantees ref.dataId.hasFull()
+                dataId = ref.dataId.full.byName()
+                # Sort values by key name
+                key = "_".join(str(id) for _, id in sorted(dataId.items()))
+
+                # For backward-compatibility with Gen 2 SQuaSH uploads
+                pfilt = dataId.get('physical_filter')
+                if not pfilt:
+                    # Grab the physical filter associated with the abstract
+                    # filter. In general there may be more than one. Take the
+                    # shortest assuming it is the most generic.
+                    pfilts = [el.name for el in
+                              self.registry.queryDimensionRecords(
+                                  'physical_filter',
+                                  dataId=ref.dataId)]
+                    pfilt = min(pfilts, key=len)
+
+                if key not in jobs.keys():
+                    job_metadata = {
+                        'filter': pfilt,
+                        'butler_generation': 'Gen3',
+                        'ci_dataset': self.dataset_name,
+                    }
+                    job_metadata.update(dataId)
+                    # Get dataset_repo_url from repository somehow?
+                    jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
+                jobs[key].measurements.insert(m)
+        return jobs
diff --git a/python/lsst/verify/gen2tasks/metadataTask.py b/python/lsst/verify/gen2tasks/metadataTask.py
@@ -86,6 +86,7 @@ def run(self, job, *, dataref, **kwargs):
             (e.g., ``"visit"``), with the corresponding value.
         """
         job.meta['instrument'] = SquashMetadataTask._getInstrument(dataref)
+        job.meta['butler_generation'] = 'Gen2'
         job.meta.update(dataref.dataId)
 
         return Struct(job=job)
diff --git a/tests/test_squashMetadataTask.py b/tests/test_squashMetadataTask.py
@@ -48,7 +48,7 @@ def _checkDataId(self, dataId):
         dataref = _makeMockDataref(dataId)
         self.testbed.run(self.job, dataref=dataref)
         self.assertEqual(set(self.job.meta.keys()),
-                         {"instrument"} | dataId.keys())
+                         {"instrument", "butler_generation"} | dataId.keys())
         self.assertEqual(self.job.meta["instrument"], "FANCYCAM")
         for key, value in dataId.items():
             self.assertEqual(self.job.meta[key], value)