Skip to content

Commit

Permalink
Merge branch 'tickets/DM-21916'
Browse files Browse the repository at this point in the history
  • Loading branch information
kfindeisen committed Feb 23, 2021
2 parents 427c833 + bd8d2d4 commit 3e22ff4
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 12 deletions.
13 changes: 13 additions & 0 deletions bin.src/gen3_to_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env python

from lsst.verify.bin.jobReporter import main, build_argparser


if __name__ == "__main__":
parser = build_argparser()
args = parser.parse_args()
main(args.repository,
args.collection,
args.metrics_package,
args.spec,
args.dataset_name)
17 changes: 6 additions & 11 deletions doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
#!/usr/bin/env python
"""Sphinx configurations to build package documentation."""

from documenteer.sphinxconfig.stackconf import build_package_configs
from documenteer.conf.pipelinespkg import *

import lsst.verify


globals().update(build_package_configs(
project_name='verify',
version=lsst.verify.version.__version__,
doxygen_xml_dirname=None))

# DEBUG only
automodsumm_writereprocessed = False
project = "verify"
html_theme_options["logotext"] = project
html_title = project
html_short_title = project
doxylink = {}
1 change: 1 addition & 0 deletions doc/lsst.verify/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ Script reference
:maxdepth: 1

scripts/dispatch_verify.py
scripts/gen3_to_job.py
scripts/inspect_job.py
scripts/lint_metrics.py

Expand Down
3 changes: 3 additions & 0 deletions doc/lsst.verify/scripts/gen3_to_job.py.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. autoprogram:: lsst.verify.bin.jobReporter:build_argparser()
:prog: gen3_to_job.py
:groups:
140 changes: 140 additions & 0 deletions python/lsst/verify/bin/jobReporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import argparse
import json
import time

from lsst.verify import Job, MetricSet
from lsst.daf.butler import Butler


__all__ = ["main", "JobReporter", "build_argparser"]


def build_argparser():
desc = 'Produce a Job object which can either be used ' \
'to build a local report or to ship to SQuaSH.'
parser = argparse.ArgumentParser(description=desc)
parser.add_argument(
'repository', type=str,
help='Path to a valid gen3 repository')
parser.add_argument(
'collection', type=str,
help='Collection to search for metric measurement values')
parser.add_argument(
'--metrics_package', type=str,
help='Metrics namespace to filter by. If omitted, all metrics '
'are processed.')
parser.add_argument(
'--spec', type=str, default="design",
help='Spec level to apply: minimum, design, or stretch')
parser.add_argument(
'--dataset_name', type=str, required=True,
help='Name of the dataset for which the report is being generated.'
'This is the desired ci_dataset tag in SQuaSH.')
return parser


def main(repository, collection, metrics_package, spec, dataset_name):
"""Extract metric values from a Gen 3 repository and rewrite them to disk
in Job format.
Parameters
----------
Parameters are the same as for the `JobReporter` class.
"""
jr = JobReporter(repository,
collection,
metrics_package,
spec,
dataset_name)
jobs = jr.run()
if len(jobs) == 0:
raise RuntimeError('Job reporter returned no jobs.')
for k, v in jobs.items():
filename = f"{metrics_package or 'all'}_{spec}_{k}_{time.time()}.json"
with open(filename, 'w') as fh:
json.dump(v.json, fh, indent=2, sort_keys=True)


class JobReporter:
"""A class for extracting metric values from a Gen 3 repository and
repackaging them as Job objects.
Parameters
----------
repository : `str`
Path to a Butler configuration YAML file or a directory containing one.
collection : `str`
Name of the collection to search for metric values.
metrics_package : `str` or `None`
If provided, the namespace by which to filter selected metrics.
spec : `str`
The level of specification to filter metrics by.
dataset_name : `str`
The name of the dataset to report to SQuaSH through the
``ci_dataset`` tag.
"""

def __init__(self,
repository,
collection,
metrics_package,
spec,
dataset_name):
# Hard coding verify_metrics as the packager for now.
# It would be easy to pass this in as an argument, if necessary.
self.metrics = MetricSet.load_metrics_package(
package_name_or_path='verify_metrics',
subset=metrics_package)
self.butler = Butler(repository)
self.registry = self.butler.registry
self.spec = spec
self.collection = collection
self.dataset_name = dataset_name

def run(self):
"""Collate job information.
Returns
-------
jobs : `dict` [`str`, `lsst.verify.Job`]
A mapping of `~lsst.verify.Job` objects, indexed by a string
representation of their data ID.
"""
jobs = {}
for metric in self.metrics:
dataset = f'metricvalue_{metric.package}_{metric.metric}'
datasetRefs = list(self.registry.queryDatasets(dataset,
collections=self.collection))
for ref in datasetRefs:
m = self.butler.get(ref, collections=self.collection)
# make the name the same as what SQuaSH Expects
m.metric_name = metric

# queryDatasets guarantees ref.dataId.hasFull()
dataId = ref.dataId.full.byName()
# Sort values by key name
key = "_".join(str(id) for _, id in sorted(dataId.items()))

# For backward-compatibility with Gen 2 SQuaSH uploads
pfilt = dataId.get('physical_filter')
if not pfilt:
# Grab the physical filter associated with the abstract
# filter. In general there may be more than one. Take the
# shortest assuming it is the most generic.
pfilts = [el.name for el in
self.registry.queryDimensionRecords(
'physical_filter',
dataId=ref.dataId)]
pfilt = min(pfilts, key=len)

if key not in jobs.keys():
job_metadata = {
'filter': pfilt,
'butler_generation': 'Gen3',
'ci_dataset': self.dataset_name,
}
job_metadata.update(dataId)
# Get dataset_repo_url from repository somehow?
jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
jobs[key].measurements.insert(m)
return jobs
1 change: 1 addition & 0 deletions python/lsst/verify/gen2tasks/metadataTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def run(self, job, *, dataref, **kwargs):
(e.g., ``"visit"``), with the corresponding value.
"""
job.meta['instrument'] = SquashMetadataTask._getInstrument(dataref)
job.meta['butler_generation'] = 'Gen2'
job.meta.update(dataref.dataId)

return Struct(job=job)
2 changes: 1 addition & 1 deletion tests/test_squashMetadataTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _checkDataId(self, dataId):
dataref = _makeMockDataref(dataId)
self.testbed.run(self.job, dataref=dataref)
self.assertEqual(set(self.job.meta.keys()),
{"instrument"} | dataId.keys())
{"instrument", "butler_generation"} | dataId.keys())
self.assertEqual(self.job.meta["instrument"], "FANCYCAM")
for key, value in dataId.items():
self.assertEqual(self.job.meta[key], value)
Expand Down

0 comments on commit 3e22ff4

Please sign in to comment.