From 3644dc553e26b90594eb63237b35fc325b0ebcdd Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Tue, 21 Jun 2022 13:41:42 -0500 Subject: [PATCH 01/12] Fix obsolete HSC config. --- pipelines/calibrate.py | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/pipelines/calibrate.py b/pipelines/calibrate.py index 34d3287e..4a580681 100644 --- a/pipelines/calibrate.py +++ b/pipelines/calibrate.py @@ -1,32 +1,15 @@ # Config override for lsst.pipe.tasks.calibrate.CalibrateTask # This config is for the ap_verify datasets that use gaia/panstarrs as their # refcat names. -from lsst.meas.algorithms import LoadIndexedReferenceObjectsTask # Use gaia for astrometry (phot_g_mean for everything, as that is the broadest -# band with the most depth) -# Use panstarrs for photometry (grizy filters) -for refObjLoader in (config.astromRefObjLoader, - config.photoRefObjLoader,): - refObjLoader.retarget(LoadIndexedReferenceObjectsTask) +# band with the most depth). + config.connections.astromRefCat = "gaia" config.astromRefObjLoader.ref_dataset_name = config.connections.astromRefCat -config.astromRefObjLoader.filterMap = { - "u": "phot_g_mean", - "g": "phot_g_mean", - "r": "phot_g_mean", - "i": "phot_g_mean", - "z": "phot_g_mean", - "y": "phot_g_mean", - "VR": "phot_g_mean"} +config.astromRefObjLoader.anyFilterMapsToThis = "phot_g_mean" +config.astromRefObjLoader.filterMap = {} + +# Use panstarrs for photometry (grizy filters). config.connections.photoRefCat = "panstarrs" config.photoRefObjLoader.ref_dataset_name = config.connections.photoRefCat -config.photoRefObjLoader.filterMap = { - "u": "g", - # TODO: workaround for DM-29186 - # "g": "g", - # "r": "r", - # "i": "i", - # "z": "z", - # "y": "y", - "VR": "g"} From aabcc55a8b2172829a65220524a68d10bbdcce66 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Tue, 21 Jun 2022 15:16:01 -0500 Subject: [PATCH 02/12] Fix broken GHA builds. --- .github/workflows/build-service.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-service.yml b/.github/workflows/build-service.yml index a0431165..672b30d1 100644 --- a/.github/workflows/build-service.yml +++ b/.github/workflows/build-service.yml @@ -42,6 +42,8 @@ jobs: cd /home/lsst/prompt_prototype source /opt/lsst/software/stack/loadLSST.bash setup -r . + # Fix permissions; arg must be absolute path. + git config --global --add safe.directory /home/lsst/prompt_prototype scons' update-service-image: From f3c08215704721cab7981355bdab5263ee0dbf5f Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Wed, 22 Jun 2022 17:14:15 -0500 Subject: [PATCH 03/12] Merge duplicate visit modules. --- python/tester/upload.py | 2 +- python/tester/visit.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 120000 python/tester/visit.py diff --git a/python/tester/upload.py b/python/tester/upload.py index 73896c9e..d277f732 100644 --- a/python/tester/upload.py +++ b/python/tester/upload.py @@ -8,7 +8,7 @@ import re import sys import time -from visit import Visit +from activator.visit import Visit @dataclass diff --git a/python/tester/visit.py b/python/tester/visit.py deleted file mode 120000 index fdaa3f7a..00000000 --- a/python/tester/visit.py +++ /dev/null @@ -1 +0,0 @@ -../activator/visit.py \ No newline at end of file From 54ac716fc9c158ae28256dd80b5f7e06920070ed Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Wed, 22 Jun 2022 17:27:31 -0500 Subject: [PATCH 04/12] Factor out shared raw path definitions. --- bin/prompt_prototype_upload_raws.sh | 2 +- python/activator/activator.py | 11 ++---- python/activator/raw.py | 47 ++++++++++++++++++++++++ python/tester/upload.py | 28 +++----------- tests/test_raw.py | 57 +++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 32 deletions(-) create mode 100644 python/activator/raw.py create mode 100644 tests/test_raw.py diff --git a/bin/prompt_prototype_upload_raws.sh b/bin/prompt_prototype_upload_raws.sh index 752c09c6..97e3885a 100755 --- a/bin/prompt_prototype_upload_raws.sh +++ b/bin/prompt_prototype_upload_raws.sh @@ -30,7 +30,7 @@ set -e # Abort on any error RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw" UPLOAD_BUCKET=rubin-prompt-proto-unobserved -# Filename format is defined in tester/upload.py and activator/activator.py: +# Filename format is defined in activator/raw.py: # instrument/detector/group/snap/instrument-group-snap-exposureId-filter-detector gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \ gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz diff --git a/python/activator/activator.py b/python/activator/activator.py index 7f819807..8dd3398b 100644 --- a/python/activator/activator.py +++ b/python/activator/activator.py @@ -36,6 +36,7 @@ from lsst.obs.base import Instrument from .make_pgpass import make_pgpass from .middleware_interface import MiddlewareInterface +from .raw import RAW_REGEXP from .visit import Visit PROJECT_ID = "prompt-proto" @@ -46,12 +47,6 @@ active_instrument = Instrument.from_string(config_instrument) calib_repo = os.environ["CALIB_REPO"] image_bucket = os.environ["IMAGE_BUCKET"] -# Format for filenames of raws uploaded to image_bucket: -# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz) -oid_regexp = re.compile( - r"(?P.*?)/(?P\d+)/(?P.*?)/(?P\d+)/" - r"(?P=instrument)-(?P=group)-(?P=snap)-(?P.*?)-(?P.*?)-(?P=detector)\.f" -) timeout = os.environ.get("IMAGE_TIMEOUT", 50) logging.basicConfig( @@ -185,7 +180,7 @@ def next_visit_handler() -> Tuple[str, int]: expected_visit.detector, ) if oid: - m = re.match(oid_regexp, oid) + m = re.match(RAW_REGEXP, oid) mwi.ingest_image(oid) expid_set.add(m.group('expid')) @@ -218,7 +213,7 @@ def next_visit_handler() -> Tuple[str, int]: for received in response.received_messages: ack_list.append(received.ack_id) oid = received.message.attributes["objectId"] - m = re.match(oid_regexp, oid) + m = re.match(RAW_REGEXP, oid) if m: instrument, detector, group, snap, expid = m.groups() _log.debug("instrument, detector, group, snap, expid = %s", m.groups()) diff --git a/python/activator/raw.py b/python/activator/raw.py new file mode 100644 index 00000000..9e4308a5 --- /dev/null +++ b/python/activator/raw.py @@ -0,0 +1,47 @@ +# This file is part of prompt_prototype. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""Common definitions of raw paths. + +This module provides tools to convert raw paths into exposure metadata and +vice versa. +""" + +__all__ = ["RAW_REGEXP", "get_raw_path"] + +import re + +# Format for filenames of raws uploaded to image bucket: +# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz) +RAW_REGEXP = re.compile( + r"(?P.*?)/(?P\d+)/(?P.*?)/(?P\d+)/" + r"(?P=instrument)-(?P=group)-(?P=snap)-(?P.*?)-(?P.*?)-(?P=detector)\.f" +) + + +def get_raw_path(instrument, detector, group, snap, exposure_id, filter): + """The path on which to store raws in the image bucket. + """ + return ( + f"{instrument}/{detector}/{group}/{snap}" + f"/{instrument}-{group}-{snap}" + f"-{exposure_id}-{filter}-{detector}.fz" + ) diff --git a/python/tester/upload.py b/python/tester/upload.py index d277f732..756e449d 100644 --- a/python/tester/upload.py +++ b/python/tester/upload.py @@ -8,6 +8,7 @@ import re import sys import time +from activator.raw import RAW_REGEXP, get_raw_path from activator.visit import Visit @@ -33,25 +34,6 @@ class Instrument: PROJECT_ID = "prompt-proto" -def raw_path(instrument, detector, group, snap, exposure_id, filter): - """The path on which to store raws in the raw bucket. - - This format is also assumed by ``activator/activator.py.`` - """ - return ( - f"{instrument}/{detector}/{group}/{snap}" - f"/{instrument}-{group}-{snap}" - f"-{exposure_id}-{filter}-{detector}.fz" - ) - - -# TODO: unify the format code across prompt_prototype -RAW_REGEXP = re.compile( - r"(?P.*?)/(?P\d+)/(?P.*?)/(?P\d+)/" - r"(?P=instrument)-(?P=group)-(?P=snap)-(?P.*?)-(?P.*?)-(?P=detector)\.f" -) - - logging.basicConfig( format="{levelname} {asctime} {name} - {message}", style="{", @@ -367,8 +349,8 @@ def upload_from_pool(visit, snap_id): # the sent images so that they have the generated ID. Pipeline # execution fails if they don't match. exposure_id = make_exposure_id(visit.instrument, visit.group, snap_id) - filename = raw_path(visit.instrument, visit.detector, visit.group, snap_id, - exposure_id, visit.filter) + filename = get_raw_path(visit.instrument, visit.detector, visit.group, snap_id, + exposure_id, visit.filter) src_bucket.copy_blob(src_blob, dest_bucket, new_name=filename) process_group(publisher, visit_infos, upload_from_pool) _log.info("Slewing to next group") @@ -399,8 +381,8 @@ def upload_from_random(publisher, instrument, dest_bucket, n_groups, group_base) # closures for the bucket and data. def upload_dummy(visit, snap_id): exposure_id = make_exposure_id(visit.instrument, visit.group, snap_id) - filename = raw_path(visit.instrument, visit.detector, visit.group, snap_id, - exposure_id, visit.filter) + filename = get_raw_path(visit.instrument, visit.detector, visit.group, snap_id, + exposure_id, visit.filter) dest_bucket.blob(filename).upload_from_string("Test") process_group(publisher, visit_infos, upload_dummy) _log.info("Slewing to next group") diff --git a/tests/test_raw.py b/tests/test_raw.py new file mode 100644 index 00000000..0fdf1d27 --- /dev/null +++ b/tests/test_raw.py @@ -0,0 +1,57 @@ +# This file is part of prompt_prototype. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import re +import unittest + +from activator.raw import RAW_REGEXP, get_raw_path + + +class RawTest(unittest.TestCase): + """Test the API for handling raw paths. + """ + def setUp(self): + super().setUp() + + self.instrument = "NotACam" + self.detector = 42 + self.group = "2022032100001" + self.snaps = 2 + self.filter = "k2022" + self.ra = 134.5454 + self.dec = -65.3261 + self.rot = 135.0 + self.kind = "IMAGINARY" + self.snap = 1 + self.exposure = 404 + + def test_writeread(self): + """Test that raw module can parse the paths it creates. + """ + path = get_raw_path(self.instrument, self.detector, self.group, self.snap, self.exposure, self.filter) + parsed = re.match(RAW_REGEXP, path) + self.assertIsNotNone(parsed) + self.assertEqual(parsed['instrument'], str(self.instrument)) + self.assertEqual(parsed['detector'], str(self.detector)) + self.assertEqual(parsed['group'], str(self.group)) + self.assertEqual(parsed['snap'], str(self.snap)) + self.assertEqual(parsed['expid'], str(self.exposure)) + self.assertEqual(parsed['filter'], str(self.filter)) From 4b8fa6a17c849fd6b9263a27adc486f04ceb8f19 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Wed, 22 Jun 2022 18:02:02 -0500 Subject: [PATCH 05/12] Move all metadata to raw "directory". This allows more flexibility in the raw filename, removing a constraint on what naming convention the summit team can choose. --- bin/prompt_prototype_upload_raws.sh | 6 +++--- python/activator/activator.py | 2 +- python/activator/raw.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bin/prompt_prototype_upload_raws.sh b/bin/prompt_prototype_upload_raws.sh index 97e3885a..1ba5fffd 100755 --- a/bin/prompt_prototype_upload_raws.sh +++ b/bin/prompt_prototype_upload_raws.sh @@ -31,8 +31,8 @@ RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw" UPLOAD_BUCKET=rubin-prompt-proto-unobserved # Filename format is defined in activator/raw.py: -# instrument/detector/group/snap/instrument-group-snap-exposureId-filter-detector +# instrument/detector/group/snap/exposureId/filter/instrument-group-snap-exposureId-filter-detector gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \ - gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz + gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/0059150/HSC-G/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz gsutil cp "${RAW_DIR}/HSC-0059160-051.fits.gz" \ - gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz + gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/0059150/HSC-G/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz diff --git a/python/activator/activator.py b/python/activator/activator.py index 8dd3398b..5afbf88f 100644 --- a/python/activator/activator.py +++ b/python/activator/activator.py @@ -116,7 +116,7 @@ def check_for_snap( was found. If multiple files match, this function logs an error but returns one of the files anyway. """ - prefix = f"{instrument}/{detector}/{group}/{snap}/{instrument}-{group}-{snap}-" + prefix = f"{instrument}/{detector}/{group}/{snap}/" _log.debug(f"Checking for '{prefix}'") blobs = list(storage_client.list_blobs(image_bucket, prefix=prefix)) if not blobs: diff --git a/python/activator/raw.py b/python/activator/raw.py index 9e4308a5..4a931f18 100644 --- a/python/activator/raw.py +++ b/python/activator/raw.py @@ -30,10 +30,10 @@ import re # Format for filenames of raws uploaded to image bucket: -# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz) +# instrument/detector/group/snap/expid/filter/*.(fits, fz, fits.gz) RAW_REGEXP = re.compile( - r"(?P.*?)/(?P\d+)/(?P.*?)/(?P\d+)/" - r"(?P=instrument)-(?P=group)-(?P=snap)-(?P.*?)-(?P.*?)-(?P=detector)\.f" + r"(?P.*?)/(?P\d+)/(?P.*?)/(?P\d+)/(?P.*?)/(?P.*?)/" + r"[^/]+\.f" ) @@ -41,7 +41,7 @@ def get_raw_path(instrument, detector, group, snap, exposure_id, filter): """The path on which to store raws in the image bucket. """ return ( - f"{instrument}/{detector}/{group}/{snap}" + f"{instrument}/{detector}/{group}/{snap}/{exposure_id}/{filter}" f"/{instrument}-{group}-{snap}" f"-{exposure_id}-{filter}-{detector}.fz" ) From 81a7984b84fb97c97808e213cd63bf0a3177c349 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 23 Jun 2022 17:58:32 -0500 Subject: [PATCH 06/12] Support a broader subset of HSC-COSMOS inputs to upload.py. --- python/tester/upload.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/tester/upload.py b/python/tester/upload.py index 756e449d..b83cc6d6 100644 --- a/python/tester/upload.py +++ b/python/tester/upload.py @@ -257,7 +257,15 @@ def get_samples(bucket, instrument): # upload time, another is to download the blob and actually read # its header. hsc_metadata = { + 59126: {"ra": 149.28531249999997, "dec": 2.935002777777778, "rot": 270.0}, + 59134: {"ra": 149.45749166666664, "dec": 2.926961111111111, "rot": 270.0}, + 59138: {"ra": 149.45739166666664, "dec": 1.4269472222222224, "rot": 270.0}, + 59142: {"ra": 149.4992083333333, "dec": 2.8853, "rot": 270.0}, 59150: {"ra": 149.96643749999996, "dec": 2.2202916666666668, "rot": 270.0}, + 59152: {"ra": 149.9247333333333, "dec": 2.1577777777777776, "rot": 270.0}, + 59154: {"ra": 150.22329166666663, "dec": 2.238341666666667, "rot": 270.0}, + 59156: {"ra": 150.26497083333334, "dec": 2.1966694444444443, "rot": 270.0}, + 59158: {"ra": 150.30668333333332, "dec": 2.2591888888888887, "rot": 270.0}, 59160: {"ra": 150.18157499999998, "dec": 2.2800083333333334, "rot": 270.0}, } From 230098e2c817ddbddf2e7115a3c78ee1c0cb3bae Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 27 Jun 2022 18:31:51 -0500 Subject: [PATCH 07/12] Clarify documentation for make_remote_butler.py. --- bin.src/make_remote_butler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin.src/make_remote_butler.py b/bin.src/make_remote_butler.py index 594f4077..96f1f32a 100755 --- a/bin.src/make_remote_butler.py +++ b/bin.src/make_remote_butler.py @@ -25,7 +25,9 @@ a source repository and export file. For most values of --target-repo and --seed-config, this script is only useful -if run from the prompt-proto project on Google Cloud. +if run from the prompt-proto project on Google Cloud (because of access +restrictions to **both** the repository's storage location and its registry +database). The user is responsible for clearing any old copies of the repository from both the target URI and the registry database. From d32e0bb538ce6cf38230ab1cfdc8f05baf85d2ab Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Tue, 28 Jun 2022 17:26:33 -0500 Subject: [PATCH 08/12] Add more raws to input dataset. --- bin/prompt_prototype_upload_raws.sh | 36 +++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/bin/prompt_prototype_upload_raws.sh b/bin/prompt_prototype_upload_raws.sh index 1ba5fffd..ab8307d3 100755 --- a/bin/prompt_prototype_upload_raws.sh +++ b/bin/prompt_prototype_upload_raws.sh @@ -20,19 +20,37 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# This script uploads the raw files from the ap_verify_ci_cosmos_pdr2 dataset -# to Google Storage. It renames the files to match prompt_prototype conventions. -# The user must have gsutil already configured, and must have -# ap_verify_ci_cosmos_pdr2 set up. +# This script uploads the raw files from the HSC PDR2 run to Google Storage. It +# renames the files to match prompt_prototype conventions. The user must have +# gsutil already configured. set -e # Abort on any error -RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw" +RAW_DIR="/datasets/hsc/raw/ssp_pdr2/2016-03-07" UPLOAD_BUCKET=rubin-prompt-proto-unobserved # Filename format is defined in activator/raw.py: # instrument/detector/group/snap/exposureId/filter/instrument-group-snap-exposureId-filter-detector -gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \ - gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/0059150/HSC-G/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz -gsutil cp "${RAW_DIR}/HSC-0059160-051.fits.gz" \ - gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/0059150/HSC-G/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz +gsutil cp "${RAW_DIR}/HSCA05913553.fits" \ + gs://${UPLOAD_BUCKET}/HSC/0/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-0.fits +gsutil cp "${RAW_DIR}/HSCA05913542.fits" \ + gs://${UPLOAD_BUCKET}/HSC/4/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-4.fits +gsutil cp "${RAW_DIR}/HSCA05913543.fits" \ + gs://${UPLOAD_BUCKET}/HSC/5/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-5.fits + +gsutil cp "${RAW_DIR}/HSCA05914353.fits" \ + gs://${UPLOAD_BUCKET}/HSC/0/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-0.fits +gsutil cp "${RAW_DIR}/HSCA05914343.fits" \ + gs://${UPLOAD_BUCKET}/HSC/5/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-5.fits +gsutil cp "${RAW_DIR}/HSCA05914337.fits" \ + gs://${UPLOAD_BUCKET}/HSC/11/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-11.fits + +gsutil cp "${RAW_DIR}/HSCA05915112.fits" \ + gs://${UPLOAD_BUCKET}/HSC/50/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-50.fits +gsutil cp "${RAW_DIR}/HSCA05915116.fits" \ + gs://${UPLOAD_BUCKET}/HSC/58/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-58.fits + +gsutil cp "${RAW_DIR}/HSCA05916109.fits" \ + gs://${UPLOAD_BUCKET}/HSC/43/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-43.fits +gsutil cp "${RAW_DIR}/HSCA05916113.fits" \ + gs://${UPLOAD_BUCKET}/HSC/51/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-51.fits From 253f778657859c6f581a0fa139ae80a1ce71f12b Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 11 Jul 2022 16:33:22 -0500 Subject: [PATCH 09/12] Remove upload.py support for arbitrarily large numbers of groups. The increase in the raw dataset size makes it unnecessary, and it complicates ID management. --- python/tester/upload.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/tester/upload.py b/python/tester/upload.py index b83cc6d6..f32507ff 100644 --- a/python/tester/upload.py +++ b/python/tester/upload.py @@ -333,8 +333,17 @@ def upload_from_raws(publisher, instrument, raw_pool, src_bucket, dest_bucket, n group IDs. group_base : `int` The base number from which to offset new group numbers. + + Exceptions + ---------- + ValueError + Raised if ``n_groups`` exceeds the number of groups in ``raw_pool``. """ - for i, true_group in enumerate(itertools.islice(itertools.cycle(raw_pool), n_groups)): + if n_groups > len(raw_pool): + raise ValueError(f"Requested {n_groups} groups, but only {len(raw_pool)} " + "unobserved raws are available.") + + for i, true_group in enumerate(itertools.islice(raw_pool, n_groups)): group = group_base + i _log.debug(f"Processing group {group} from unobserved {true_group}...") # snap_dict maps snap_id to {visit: blob} From 42f87dfc8428d7bce8ba6059ec101093fa9f2aee Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 11 Jul 2022 17:23:36 -0500 Subject: [PATCH 10/12] Stop overwriting exposure ID of uploaded files. --- python/tester/upload.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/tester/upload.py b/python/tester/upload.py index f32507ff..58d8e360 100644 --- a/python/tester/upload.py +++ b/python/tester/upload.py @@ -362,10 +362,9 @@ def upload_from_raws(publisher, instrument, raw_pool, src_bucket, dest_bucket, n # closures for the bucket and data. def upload_from_pool(visit, snap_id): src_blob = snap_dict[snap_id][visit] - # TODO: use the images' native exposure ID here, or (better) hack - # the sent images so that they have the generated ID. Pipeline - # execution fails if they don't match. - exposure_id = make_exposure_id(visit.instrument, visit.group, snap_id) + # TODO: converting raw_pool from a nested mapping to an indexable + # custom class would make it easier to include such metadata as expId. + exposure_id = int(re.match(RAW_REGEXP, src_blob.name).group('expid')) filename = get_raw_path(visit.instrument, visit.detector, visit.group, snap_id, exposure_id, visit.filter) src_bucket.copy_blob(src_blob, dest_bucket, new_name=filename) From bf7ad83768d2cdf7d2d8d739477ce07324c88030 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Tue, 12 Jul 2022 14:23:07 -0500 Subject: [PATCH 11/12] Process only the requested exposure IDs in run_pipeline. --- python/activator/middleware_interface.py | 24 ++++++------------------ tests/test_middleware_interface.py | 12 ------------ 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py index ac51766e..29cbbc38 100644 --- a/python/activator/middleware_interface.py +++ b/python/activator/middleware_interface.py @@ -386,28 +386,16 @@ def run_pipeline(self, visit: Visit, exposure_ids: set) -> None: Group of snaps from one detector to be processed. exposure_ids : `set` Identifiers of the exposures that were received. - TODO: We need to be careful about the distinction between snap IDs - (a running series from 0 to N-1) and exposure IDs (which are more - complex and encode other info). Butler currently does not recognize - a snap ID, as such. - TODO: I believe this is unnecessary because it should be encoded - in the `visit` object, but we'll have to test how that works once - we implemented this with actual data. """ # TODO: we want to define visits earlier, but we have to ingest a # faked raw file and appropriate SSO data during prep (and then # cleanup when ingesting the real data). - # TODO: Also, using this approach (instead of saving the datasetRefs - # returned by ingest and using them to define visits) also requires - # pruning this list down to only the exposures that aren't already - # defined (otherwise defineVisits.run does extra "nothing" work). - exposures = set(self.butler.registry.queryDataIds(["exposure"])) - self.define_visits.run(exposures) - - # TODO: temporary workaround for uploader and image header not agreeing - # on what the exposure ID is. We use the full exposure list here - # because we can't support multiple visits anyway. - exposure_ids = {data_id["exposure"] for data_id in exposures} + try: + self.define_visits.run({"instrument": self.instrument.getName(), + "exposure": exp} for exp in exposure_ids) + except lsst.daf.butler.registry.DataIdError as e: + # TODO: a good place for a custom exception? + raise RuntimeError("No data to process.") from e # TODO: can we move this from_pipeline call to prep_butler? where = f"detector={visit.detector} and exposure in ({','.join(str(x) for x in exposure_ids)})" diff --git a/tests/test_middleware_interface.py b/tests/test_middleware_interface.py index c826390d..b556bd8f 100644 --- a/tests/test_middleware_interface.py +++ b/tests/test_middleware_interface.py @@ -291,7 +291,6 @@ def test_run_pipeline(self): self.interface.run_pipeline(self.next_visit, {1}) mock_run.assert_called_once_with(register_dataset_types=True) - @unittest.skip("run_pipeline ignores the exposure_ids arg, so we can't test passing invalid ones.") def test_run_pipeline_empty_quantum_graph(self): """Test that running a pipeline that results in an empty quantum graph (because the exposure ids are wrong), raises. @@ -310,14 +309,3 @@ def test_run_pipeline_empty_quantum_graph(self): with self.assertRaisesRegex(RuntimeError, "No data to process"): self.interface.run_pipeline(self.next_visit, {2}) - - def test_run_pipeline_missing_raws(self): - """Test that running a pipeline with no raws, raises. - """ - # Have to setup the data so that we can create the pipeline executor. - self.interface.prep_butler(self.next_visit) - - # TODO: this exception will almost certainly change once we change how - # we do defineVisits. - with self.assertRaisesRegex(RuntimeError, "No exposures given"): - self.interface.run_pipeline(self.next_visit, {2}) From 63b7e69137eb6a7a29024b466fa04bc13479db0d Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Tue, 12 Jul 2022 16:38:07 -0500 Subject: [PATCH 12/12] Avoid hardcoded collection names in MiddlewareInterface. --- python/activator/middleware_interface.py | 31 +++++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py index 29cbbc38..793dd1f6 100644 --- a/python/activator/middleware_interface.py +++ b/python/activator/middleware_interface.py @@ -75,6 +75,12 @@ class MiddlewareInterface: appropriate for use in the Google Cloud environment; typically only change this when running local tests. """ + _COLLECTION_TEMPLATE = "templates" + """The collection used for templates. + """ + _COLLECTION_SKYMAP = "skymaps" + """The collection used for skymaps. + """ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str, butler: Butler, @@ -90,7 +96,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str, self._download_store = None self.instrument = lsst.obs.base.Instrument.from_string(instrument) - self.output_collection = f"{self.instrument.getName()}/prompt" + self.output_collection = self.instrument.makeCollectionName("prompt") self._init_local_butler(butler) self._init_ingester() @@ -104,7 +110,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str, # This code will break once cameras start being versioned. self.camera = self.central_butler.get( "camera", instrument=self.instrument.getName(), - collections=self.instrument.makeCalibrationCollectionName("unbounded") + collections=self.instrument.makeUnboundedCalibrationRunName() ) self.skymap = self.central_butler.get("skyMap") @@ -170,9 +176,9 @@ def prep_butler(self, visit: Visit) -> None: self._export_calibs(export, visit.detector, visit.filter) # CHAINED collections - export.saveCollection("refcats") - export.saveCollection("templates") - export.saveCollection(self.instrument.makeCollectionName("defaults")) + export.saveCollection(self.instrument.makeRefCatCollectionName()) + export.saveCollection(self._COLLECTION_TEMPLATE) + export.saveCollection(self.instrument.makeUmbrellaCollectionName()) self.butler.import_(filename=export_file.name, directory=self.central_butler.datastore.root, @@ -203,10 +209,11 @@ def _export_refcats(self, export, center, radius): # collection, so we have to specify a list here. Replace this # with another solution ASAP. possible_refcats = ["gaia", "panstarrs", "gaia_dr2_20200414", "ps1_pv3_3pi_20170110"] - export.saveDatasets(self.central_butler.registry.queryDatasets(possible_refcats, - collections="refcats", - where=htm_where, - findFirst=True)) + export.saveDatasets(self.central_butler.registry.queryDatasets( + possible_refcats, + collections=self.instrument.makeRefCatCollectionName(), + where=htm_where, + findFirst=True)) def _export_skymap_and_templates(self, export, center, detector, wcs): """Export the skymap and templates for this visit from the central @@ -229,7 +236,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs): # otherwise we get a UNIQUE constraint error when prepping for the # second visit. export.saveDatasets(self.central_butler.registry.queryDatasets("skyMap", - collections="skymaps", + collections=self._COLLECTION_SKYMAP, findFirst=True)) # Getting only one tract should be safe: we're getting the # tract closest to this detector, so we should be well within @@ -247,7 +254,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs): # TODO: alternately, can we just assume that there is exactly # one coadd type in the central butler? export.saveDatasets(self.central_butler.registry.queryDatasets("*Coadd", - collections="templates", + collections=self._COLLECTION_TEMPLATE, where=template_where)) def _export_calibs(self, export, detector_id, filter): @@ -295,7 +302,7 @@ def _prep_collections(self): CollectionType.RUN) self.butler.registry.registerCollection(self.output_run, CollectionType.RUN) self.butler.registry.registerCollection(self.output_collection, CollectionType.CHAINED) - collections = [self.instrument.makeCollectionName("defaults"), + collections = [self.instrument.makeUmbrellaCollectionName(), self.instrument.makeDefaultRawIngestRunName(), self.output_run] self.butler.registry.setCollectionChain(self.output_collection, collections)