From 05972d9bfc7fb26d8d9c86d7c57974d150bd1b51 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Fri, 4 Mar 2022 18:13:43 -0600 Subject: [PATCH 1/8] Copy make_preloaded_export.py from ap_verify_ci_hits2015. The file can be used to create a reproducible dump of any ap_verify dataset's preloaded repo. --- bin.src/make_preloaded_export.py | 89 ++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100755 bin.src/make_preloaded_export.py diff --git a/bin.src/make_preloaded_export.py b/bin.src/make_preloaded_export.py new file mode 100755 index 00000000..11a2cc62 --- /dev/null +++ b/bin.src/make_preloaded_export.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# This file is part of prompt_prototype. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import argparse +import logging +import os +import sys + +import lsst.log +import lsst.skymap +import lsst.daf.butler as daf_butler +import lsst.ap.verify as ap_verify + + +def _make_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--dataset", required=True, + help="The name of the dataset as recognized by ap_verify.py.") + return parser + + +def main(): + # Ensure logs from tasks are visible + logging.basicConfig(level=logging.INFO, stream=sys.stdout) + lsst.log.configure_pylog_MDC("DEBUG", MDC_class=None) + + args = _make_parser().parse_args() + dataset = ap_verify.dataset.Dataset(args.dataset) + gen3_repo = os.path.join(dataset.datasetRoot, "preloaded") + + logging.info("Exporting Gen 3 registry to configure new repos...") + _export_for_copy(dataset, gen3_repo) + + +def _export_for_copy(dataset, repo): + """Export a Gen 3 repository so that a dataset can make copies later. + + Parameters + ---------- + dataset : `lsst.ap.verify.dataset.Dataset` + The dataset needing the ability to copy the repository. + repo : `str` + The location of the Gen 3 repository. + """ + butler = daf_butler.Butler(repo) + with butler.export(directory=dataset.configLocation, format="yaml") as contents: + # Need all detectors, even those without data, for visit definition + contents.saveDataIds(butler.registry.queryDataIds({"detector"}).expanded()) + contents.saveDatasets(butler.registry.queryDatasets(datasetType=..., collections=...)) + # Explicitly save the calibration and chained collections. + # Do _not_ include the RUN collections here because that will export + # an empty raws collection, which ap_verify assumes does not exist + # before ingest. + target_types = {daf_butler.CollectionType.CALIBRATION, daf_butler.CollectionType.CHAINED} + for collection in butler.registry.queryCollections(..., collectionTypes=target_types): + contents.saveCollection(collection) + # Export skymap collection even if it is empty + contents.saveCollection(lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) + # Dataset export exports visits, but need matching visit definitions as + # well (DefineVisitsTask won't add them back in). + contents.saveDimensionData("exposure", + butler.registry.queryDimensionRecords("exposure")) + contents.saveDimensionData("visit_definition", + butler.registry.queryDimensionRecords("visit_definition")) + contents.saveDimensionData("visit_detector_region", + butler.registry.queryDimensionRecords("visit_detector_region")) + + +if __name__ == "__main__": + main() From 465e78873e36b416daa047e5556c821e165570c4 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Fri, 4 Mar 2022 18:28:04 -0600 Subject: [PATCH 2/8] Detach make_preloaded_export.py from dataset framework. --- bin.src/make_preloaded_export.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/bin.src/make_preloaded_export.py b/bin.src/make_preloaded_export.py index 11a2cc62..6621d971 100755 --- a/bin.src/make_preloaded_export.py +++ b/bin.src/make_preloaded_export.py @@ -20,49 +20,50 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . + +"""Selectively export the contents of an ap_verify dataset. + +This script selects the subset of an ap_verify dataset's preloaded repository that +matches what the central prompt processing repository ought to look like. +""" + + import argparse import logging import os import sys -import lsst.log import lsst.skymap import lsst.daf.butler as daf_butler -import lsst.ap.verify as ap_verify def _make_parser(): parser = argparse.ArgumentParser() - parser.add_argument("--dataset", required=True, - help="The name of the dataset as recognized by ap_verify.py.") + parser.add_argument("--src-repo", required=True, + help="The location of the repository to be exported.") return parser def main(): - # Ensure logs from tasks are visible logging.basicConfig(level=logging.INFO, stream=sys.stdout) - lsst.log.configure_pylog_MDC("DEBUG", MDC_class=None) args = _make_parser().parse_args() - dataset = ap_verify.dataset.Dataset(args.dataset) - gen3_repo = os.path.join(dataset.datasetRoot, "preloaded") + gen3_repo = os.path.abspath(args.src_repo) logging.info("Exporting Gen 3 registry to configure new repos...") - _export_for_copy(dataset, gen3_repo) + _export_for_copy(gen3_repo) -def _export_for_copy(dataset, repo): +def _export_for_copy(repo): """Export a Gen 3 repository so that a dataset can make copies later. Parameters ---------- - dataset : `lsst.ap.verify.dataset.Dataset` - The dataset needing the ability to copy the repository. repo : `str` The location of the Gen 3 repository. """ butler = daf_butler.Butler(repo) - with butler.export(directory=dataset.configLocation, format="yaml") as contents: + with butler.export(format="yaml") as contents: # Need all detectors, even those without data, for visit definition contents.saveDataIds(butler.registry.queryDataIds({"detector"}).expanded()) contents.saveDatasets(butler.registry.queryDatasets(datasetType=..., collections=...)) From 45794d346c5b473a5d43c045dd842f76359d0fb2 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Fri, 4 Mar 2022 18:52:58 -0600 Subject: [PATCH 3/8] Customize make_preloaded_export to prompt processing inputs. --- bin.src/make_preloaded_export.py | 38 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/bin.src/make_preloaded_export.py b/bin.src/make_preloaded_export.py index 6621d971..39194921 100755 --- a/bin.src/make_preloaded_export.py +++ b/bin.src/make_preloaded_export.py @@ -31,9 +31,9 @@ import argparse import logging import os +import re import sys -import lsst.skymap import lsst.daf.butler as daf_butler @@ -54,6 +54,18 @@ def main(): _export_for_copy(gen3_repo) +def _get_dataset_types(): + """Identify the dataset types that should be marked for export. + + Returns + ------- + types : iterable [`str` or `re.Pattern`] + The dataset types to include + """ + # Everything except raws and SS ephemerides + return [re.compile("^(?!raw|visitSsObjects).*")] + + def _export_for_copy(repo): """Export a Gen 3 repository so that a dataset can make copies later. @@ -66,24 +78,14 @@ def _export_for_copy(repo): with butler.export(format="yaml") as contents: # Need all detectors, even those without data, for visit definition contents.saveDataIds(butler.registry.queryDataIds({"detector"}).expanded()) - contents.saveDatasets(butler.registry.queryDatasets(datasetType=..., collections=...)) - # Explicitly save the calibration and chained collections. - # Do _not_ include the RUN collections here because that will export - # an empty raws collection, which ap_verify assumes does not exist - # before ingest. - target_types = {daf_butler.CollectionType.CALIBRATION, daf_butler.CollectionType.CHAINED} - for collection in butler.registry.queryCollections(..., collectionTypes=target_types): + contents.saveDatasets(butler.registry.queryDatasets( + datasetType=_get_dataset_types(), collections=...)) + # Save calibration collection + for collection in butler.registry.queryCollections( + collectionTypes=daf_butler.CollectionType.CALIBRATION): contents.saveCollection(collection) - # Export skymap collection even if it is empty - contents.saveCollection(lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) - # Dataset export exports visits, but need matching visit definitions as - # well (DefineVisitsTask won't add them back in). - contents.saveDimensionData("exposure", - butler.registry.queryDimensionRecords("exposure")) - contents.saveDimensionData("visit_definition", - butler.registry.queryDimensionRecords("visit_definition")) - contents.saveDimensionData("visit_detector_region", - butler.registry.queryDimensionRecords("visit_detector_region")) + # Do not export chains, as they will need to be reworked to satisfy + # prompt processing's assumptions. if __name__ == "__main__": From 127214dd613b24c7ec187b615db8c8004938f3c8 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 7 Mar 2022 11:43:52 -0600 Subject: [PATCH 4/8] Add timer for make_preloaded_export. --- bin.src/make_preloaded_export.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin.src/make_preloaded_export.py b/bin.src/make_preloaded_export.py index 39194921..62c173e4 100755 --- a/bin.src/make_preloaded_export.py +++ b/bin.src/make_preloaded_export.py @@ -33,6 +33,7 @@ import os import re import sys +import time import lsst.daf.butler as daf_butler @@ -51,7 +52,10 @@ def main(): gen3_repo = os.path.abspath(args.src_repo) logging.info("Exporting Gen 3 registry to configure new repos...") + start = time.time_ns() _export_for_copy(gen3_repo) + end = time.time_ns() + logging.info("Export finished in %.3fs.", 1e-9 * (end - start)) def _get_dataset_types(): From 23afc837686767fcacfe3a2b3a5e014a5b4d826e Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 14 Mar 2022 14:00:42 -0500 Subject: [PATCH 5/8] Create make_remote_butler.py for creating a duplicate repo. --- bin.src/make_remote_butler.py | 119 ++++++++++++++++++++++++++++++++++ etc/db_butler.yaml | 3 + ups/prompt_prototype.table | 1 + 3 files changed, 123 insertions(+) create mode 100755 bin.src/make_remote_butler.py create mode 100644 etc/db_butler.yaml diff --git a/bin.src/make_remote_butler.py b/bin.src/make_remote_butler.py new file mode 100755 index 00000000..3665b75f --- /dev/null +++ b/bin.src/make_remote_butler.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# This file is part of prompt_prototype. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +"""Simple script for creating a repository at a remote URI, given +a source repository and export file. + +For most values of --target-repo and --seed-config, this script is only useful +if run from the prompt-proto project on Google Cloud. + +The user is responsible for clearing any old copies of the repository from +both the target URI and the registry database. +""" + + +import argparse +import logging +import os +import sys +import time + +from lsst.utils import getPackageDir +from lsst.daf.butler import Butler, CollectionType, Config +from lsst.obs.base import Instrument + + +def _make_parser(): + parser = argparse.ArgumentParser() + # Could reasonably be positional arguments, but keep them as keywords to + # prevent users from confusing --src-repo with --target-repo. + parser.add_argument("--src-repo", required=True, + help="The location of the repository whose files are to be copied.") + parser.add_argument("--target-repo", required=True, + help="The URI of the repository to create.") + parser.add_argument("--seed-config", + default=os.path.join(getPackageDir("prompt_prototype"), "etc", "db_butler.yaml"), + help="The config file to use for the new repository. Defaults to etc/db_butler.yaml.") + parser.add_argument("--export-file", default="export.yaml", + help="The export file containing the repository contents. Defaults to ./export.yaml.") + return parser + + +def _add_chains(butler): + """Create collections to serve as a uniform interface. + + Parameters + ---------- + butler : `lsst.daf.butler.Butler` + A Butler pointing to the repository to modify. Assumed to already contain the following collections: + + - standard calibration collection + - standard skymap collection + - templates/* + - refcats/* + """ + butler.registry.registerCollection("templates", type=CollectionType.CHAINED) + butler.registry.setCollectionChain( + "templates", + list(butler.registry.queryCollections("templates/*", collectionTypes=CollectionType.RUN)) + ) + + butler.registry.registerCollection("refcats", type=CollectionType.CHAINED) + butler.registry.setCollectionChain( + "refcats", + list(butler.registry.queryCollections("refcats/*", collectionTypes=CollectionType.RUN)) + ) + + instrument = Instrument.fromName(list(butler.registry.queryDataIds("instrument"))[0]["instrument"], + butler.registry) + defaults = instrument.makeCollectionName("defaults") + butler.registry.registerCollection(defaults, type=CollectionType.CHAINED) + butler.registry.setCollectionChain( + defaults, + [instrument.makeCalibrationCollectionName(), "templates", "skymaps", "refcats"] + ) + + +def main(): + logging.basicConfig(level=logging.INFO, stream=sys.stdout) + + args = _make_parser().parse_args() + seed_config = Config(args.seed_config) + logging.info("Creating repository at %s...", args.target_repo) + start_make = time.time_ns() + config = Butler.makeRepo(args.target_repo, config=seed_config, overwrite=False) + end_make = time.time_ns() + logging.info("Repository creation finished in %.3fs", 1e-9 * (end_make - start_make)) + start_butler = time.time_ns() + butler = Butler(config, writeable=True) + end_butler = time.time_ns() + logging.info("Butler creation finished in %.3fs", 1e-9 * (end_butler - start_butler)) + start_import = time.time_ns() + butler.import_(directory=args.src_repo, filename=args.export_file, transfer="auto") + end_import = time.time_ns() + logging.info("Import finished in %.3fs", 1e-9 * (end_import - start_import)) + _add_chains(butler) + + +if __name__ == "__main__": + main() diff --git a/etc/db_butler.yaml b/etc/db_butler.yaml new file mode 100644 index 00000000..1be7363b --- /dev/null +++ b/etc/db_butler.yaml @@ -0,0 +1,3 @@ +registry: + db: postgresql://postgres@localhost:5432/ + namespace: support_data_template diff --git a/ups/prompt_prototype.table b/ups/prompt_prototype.table index 26064978..93419c54 100644 --- a/ups/prompt_prototype.table +++ b/ups/prompt_prototype.table @@ -4,6 +4,7 @@ # the "base" package. setupRequired(base) setupRequired(sconsUtils) +setupRequired(utils) # Used by scripts in bin.src setupRequired(daf_butler) # Used by middleware_interface module. setupRequired(obs_base) setupRequired(obs_lsst) From 776cf52962fe201284522166734b9503b59a5fc3 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 14 Mar 2022 15:08:43 -0500 Subject: [PATCH 6/8] Clean up timer code. --- bin.src/make_remote_butler.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/bin.src/make_remote_butler.py b/bin.src/make_remote_butler.py index 3665b75f..594f4077 100755 --- a/bin.src/make_remote_butler.py +++ b/bin.src/make_remote_butler.py @@ -36,9 +36,9 @@ import logging import os import sys -import time from lsst.utils import getPackageDir +from lsst.utils.timer import time_this from lsst.daf.butler import Butler, CollectionType, Config from lsst.obs.base import Instrument @@ -100,18 +100,12 @@ def main(): args = _make_parser().parse_args() seed_config = Config(args.seed_config) logging.info("Creating repository at %s...", args.target_repo) - start_make = time.time_ns() - config = Butler.makeRepo(args.target_repo, config=seed_config, overwrite=False) - end_make = time.time_ns() - logging.info("Repository creation finished in %.3fs", 1e-9 * (end_make - start_make)) - start_butler = time.time_ns() - butler = Butler(config, writeable=True) - end_butler = time.time_ns() - logging.info("Butler creation finished in %.3fs", 1e-9 * (end_butler - start_butler)) - start_import = time.time_ns() - butler.import_(directory=args.src_repo, filename=args.export_file, transfer="auto") - end_import = time.time_ns() - logging.info("Import finished in %.3fs", 1e-9 * (end_import - start_import)) + with time_this(msg="Repository creation", level=logging.INFO): + config = Butler.makeRepo(args.target_repo, config=seed_config, overwrite=False) + with time_this(msg="Butler creation", level=logging.INFO): + butler = Butler(config, writeable=True) + with time_this(msg="Import", level=logging.INFO): + butler.import_(directory=args.src_repo, filename=args.export_file, transfer="auto") _add_chains(butler) From a705fa4fccdfa2120eb3c5dc19da9d6b5d7d18e8 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 7 Mar 2022 12:42:31 -0600 Subject: [PATCH 7/8] Support HSC in upload.py. This will allow HSC raws to be generated to put into the central repo. --- doc/playbook.rst | 2 +- python/tester/upload.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/playbook.rst b/doc/playbook.rst index dac12c06..89db695e 100644 --- a/doc/playbook.rst +++ b/doc/playbook.rst @@ -48,7 +48,7 @@ Pub/Sub Topics ============== One Google Pub/Sub topic is used for ``nextVisit`` events. -Additional topics are used for images from each instrument, where the instrument is one of ``LSSTCam``, ``LSSTComCam``, ``LATISS``, or ``DECam``. +Additional topics are used for images from each instrument, where the instrument is one of ``LSSTCam``, ``LSSTComCam``, ``LATISS``, ``DECam``, or ``HSC``. To create the topic, in the Google Cloud Console for the ``prompt-proto`` project: diff --git a/python/tester/upload.py b/python/tester/upload.py index a7e9d42a..c22241eb 100644 --- a/python/tester/upload.py +++ b/python/tester/upload.py @@ -20,6 +20,7 @@ class Instrument: "LSSTComCam": Instrument(2, 9), "LATISS": Instrument(1, 1), "DECam": Instrument(1, 62), + "HSC": Instrument(1, 112), } EXPOSURE_INTERVAL = 18 SLEW_INTERVAL = 2 From 5424fb889c6d65fe85509f114c77c179b0ac0a97 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 7 Mar 2022 15:38:49 -0600 Subject: [PATCH 8/8] Update bucket descriptions to match current plans. The Playbook is the only place where we can practically document the buckets at present. --- doc/playbook.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/playbook.rst b/doc/playbook.rst index 89db695e..37d0ab0a 100644 --- a/doc/playbook.rst +++ b/doc/playbook.rst @@ -2,6 +2,8 @@ Playbook for the Prompt Processing Proposal and Prototype ######################################################### +.. _DMTN-219: https://dmtn-219.lsst.io/ + Table of Contents ================= @@ -65,11 +67,14 @@ On the other hand, using multiple topics is also simple to do. Buckets ======= -A single bucket named ``rubin-prompt-proto-main`` has been created to hold incoming raw images. +A single bucket named ``rubin-prompt-proto-main`` has been created to hold the central repository described in `DMTN-219`_, as well as incoming raw images. + +The bucket ``rubin-prompt-proto-support-data-template`` contains a pristine copy of the calibration datasets and templates. +This bucket is not intended for direct use by the prototype, but can be used to restore the central repository to its state at the start of an observing run. -An additional bucket will be needed eventually to hold a Butler repo containing calibration datasets and templates. +The bucket ``rubin-prompt-proto-unobserved`` contains raw files that the upload script(s) can draw from to create incoming raws for ``rubin-prompt-proto-main``. -The raw image bucket has had notifications configured for it; these publish to a Google Pub/Sub topic as mentioned in the previous section. +The ``-main`` bucket has had notifications configured for it; these publish to a Google Pub/Sub topic as mentioned in the previous section. To configure these notifications, in a shell: .. code-block:: sh