Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-21916: SQuaSH upload of Gen 3 Measurements #77

Merged
merged 14 commits into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 13 additions & 0 deletions bin.src/gen3_to_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env python

from lsst.verify.bin.jobReporter import main, build_argparser


if __name__ == "__main__":
parser = build_argparser()
args = parser.parse_args()
main(args.repository,
args.collection,
args.metrics_package,
args.spec,
args.dataset_name)
17 changes: 6 additions & 11 deletions doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
#!/usr/bin/env python
"""Sphinx configurations to build package documentation."""

from documenteer.sphinxconfig.stackconf import build_package_configs
from documenteer.conf.pipelinespkg import *

import lsst.verify


globals().update(build_package_configs(
project_name='verify',
version=lsst.verify.version.__version__,
doxygen_xml_dirname=None))

# DEBUG only
automodsumm_writereprocessed = False
project = "verify"
html_theme_options["logotext"] = project
html_title = project
html_short_title = project
doxylink = {}
1 change: 1 addition & 0 deletions doc/lsst.verify/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ Script reference
:maxdepth: 1

scripts/dispatch_verify.py
scripts/gen3_to_job.py
scripts/inspect_job.py
scripts/lint_metrics.py

Expand Down
3 changes: 3 additions & 0 deletions doc/lsst.verify/scripts/gen3_to_job.py.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. autoprogram:: lsst.verify.bin.jobReporter:build_argparser()
:prog: gen3_to_job.py
:groups:
140 changes: 140 additions & 0 deletions python/lsst/verify/bin/jobReporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import argparse
import json
import time

from lsst.verify import Job, MetricSet
from lsst.daf.butler import Butler


__all__ = ["main", "JobReporter", "build_argparser"]


def build_argparser():
desc = 'Produce a Job object which can either be used ' \
'to build a local report or to ship to SQuaSH.'
parser = argparse.ArgumentParser(description=desc)
parser.add_argument(
'repository', type=str,
help='Path to a valid gen3 repository')
parser.add_argument(
'collection', type=str,
help='Collection to search for metric measurement values')
parser.add_argument(
'--metrics_package', type=str,
help='Metrics namespace to filter by. If omitted, all metrics '
'are processed.')
parser.add_argument(
'--spec', type=str, default="design",
help='Spec level to apply: minimum, design, or stretch')
parser.add_argument(
'--dataset_name', type=str, required=True,
help='Name of the dataset for which the report is being generated.'
'This is the desired ci_dataset tag in SQuaSH.')
return parser


def main(repository, collection, metrics_package, spec, dataset_name):
"""Extract metric values from a Gen 3 repository and rewrite them to disk
in Job format.

Parameters
----------
Parameters are the same as for the `JobReporter` class.
"""
jr = JobReporter(repository,
collection,
metrics_package,
spec,
dataset_name)
jobs = jr.run()
if len(jobs) == 0:
raise RuntimeError('Job reporter returned no jobs.')
for k, v in jobs.items():
filename = f"{metrics_package or 'all'}_{spec}_{k}_{time.time()}.json"
with open(filename, 'w') as fh:
json.dump(v.json, fh, indent=2, sort_keys=True)


class JobReporter:
"""A class for extracting metric values from a Gen 3 repository and
repackaging them as Job objects.

Parameters
----------
repository : `str`
Path to a Butler configuration YAML file or a directory containing one.
collection : `str`
Name of the collection to search for metric values.
metrics_package : `str` or `None`
If provided, the namespace by which to filter selected metrics.
spec : `str`
The level of specification to filter metrics by.
dataset_name : `str`
The name of the dataset to report to SQuaSH through the
``ci_dataset`` tag.
"""

def __init__(self,
repository,
collection,
metrics_package,
spec,
dataset_name):
# Hard coding verify_metrics as the packager for now.
# It would be easy to pass this in as an argument, if necessary.
self.metrics = MetricSet.load_metrics_package(
package_name_or_path='verify_metrics',
subset=metrics_package)
self.butler = Butler(repository)
self.registry = self.butler.registry
self.spec = spec
self.collection = collection
self.dataset_name = dataset_name

def run(self):
"""Collate job information.

Returns
-------
jobs : `dict` [`str`, `lsst.verify.Job`]
A mapping of `~lsst.verify.Job` objects, indexed by a string
representation of their data ID.
"""
jobs = {}
for metric in self.metrics:
dataset = f'metricvalue_{metric.package}_{metric.metric}'
datasetRefs = list(self.registry.queryDatasets(dataset,
collections=self.collection))
for ref in datasetRefs:
m = self.butler.get(ref, collections=self.collection)
# make the name the same as what SQuaSH Expects
m.metric_name = metric

# queryDatasets guarantees ref.dataId.hasFull()
dataId = ref.dataId.full.byName()
# Sort values by key name
key = "_".join(str(id) for _, id in sorted(dataId.items()))

# For backward-compatibility with Gen 2 SQuaSH uploads
pfilt = dataId.get('physical_filter')
if not pfilt:
# Grab the physical filter associated with the abstract
# filter. In general there may be more than one. Take the
# shortest assuming it is the most generic.
pfilts = [el.name for el in
self.registry.queryDimensionRecords(
'physical_filter',
dataId=ref.dataId)]
pfilt = min(pfilts, key=len)

if key not in jobs.keys():
job_metadata = {
'filter': pfilt,
'butler_generation': 'Gen3',
'ci_dataset': self.dataset_name,
}
job_metadata.update(dataId)
# Get dataset_repo_url from repository somehow?
jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
jobs[key].measurements.insert(m)
return jobs
1 change: 1 addition & 0 deletions python/lsst/verify/gen2tasks/metadataTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def run(self, job, *, dataref, **kwargs):
(e.g., ``"visit"``), with the corresponding value.
"""
job.meta['instrument'] = SquashMetadataTask._getInstrument(dataref)
job.meta['butler_generation'] = 'Gen2'
job.meta.update(dataref.dataId)

return Struct(job=job)
2 changes: 1 addition & 1 deletion tests/test_squashMetadataTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _checkDataId(self, dataId):
dataref = _makeMockDataref(dataId)
self.testbed.run(self.job, dataref=dataref)
self.assertEqual(set(self.job.meta.keys()),
{"instrument"} | dataId.keys())
{"instrument", "butler_generation"} | dataId.keys())
self.assertEqual(self.job.meta["instrument"], "FANCYCAM")
for key, value in dataId.items():
self.assertEqual(self.job.meta[key], value)
Expand Down