From 3644dc553e26b90594eb63237b35fc325b0ebcdd Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Tue, 21 Jun 2022 13:41:42 -0500
Subject: [PATCH 01/12] Fix obsolete HSC config.

---
 pipelines/calibrate.py | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/pipelines/calibrate.py b/pipelines/calibrate.py
index 34d3287e..4a580681 100644
--- a/pipelines/calibrate.py
+++ b/pipelines/calibrate.py
@@ -1,32 +1,15 @@
 # Config override for lsst.pipe.tasks.calibrate.CalibrateTask
 # This config is for the ap_verify datasets that use gaia/panstarrs as their
 # refcat names.
-from lsst.meas.algorithms import LoadIndexedReferenceObjectsTask
 
 # Use gaia for astrometry (phot_g_mean for everything, as that is the broadest
-# band with the most depth)
-# Use panstarrs for photometry (grizy filters)
-for refObjLoader in (config.astromRefObjLoader,
-                     config.photoRefObjLoader,):
-    refObjLoader.retarget(LoadIndexedReferenceObjectsTask)
+# band with the most depth).
+
 config.connections.astromRefCat = "gaia"
 config.astromRefObjLoader.ref_dataset_name = config.connections.astromRefCat
-config.astromRefObjLoader.filterMap = {
-    "u": "phot_g_mean",
-    "g": "phot_g_mean",
-    "r": "phot_g_mean",
-    "i": "phot_g_mean",
-    "z": "phot_g_mean",
-    "y": "phot_g_mean",
-    "VR": "phot_g_mean"}
+config.astromRefObjLoader.anyFilterMapsToThis = "phot_g_mean"
+config.astromRefObjLoader.filterMap = {}
+
+# Use panstarrs for photometry (grizy filters).
 config.connections.photoRefCat = "panstarrs"
 config.photoRefObjLoader.ref_dataset_name = config.connections.photoRefCat
-config.photoRefObjLoader.filterMap = {
-    "u": "g",
-    # TODO: workaround for DM-29186
-    # "g": "g",
-    # "r": "r",
-    # "i": "i",
-    # "z": "z",
-    # "y": "y",
-    "VR": "g"}

From aabcc55a8b2172829a65220524a68d10bbdcce66 Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Tue, 21 Jun 2022 15:16:01 -0500
Subject: [PATCH 02/12] Fix broken GHA builds.

---
 .github/workflows/build-service.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build-service.yml b/.github/workflows/build-service.yml
index a0431165..672b30d1 100644
--- a/.github/workflows/build-service.yml
+++ b/.github/workflows/build-service.yml
@@ -42,6 +42,8 @@ jobs:
                   cd /home/lsst/prompt_prototype
                   source /opt/lsst/software/stack/loadLSST.bash
                   setup -r .
+                  # Fix permissions; arg must be absolute path.
+                  git config --global --add safe.directory /home/lsst/prompt_prototype
                   scons'
 
   update-service-image:

From f3c08215704721cab7981355bdab5263ee0dbf5f Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Wed, 22 Jun 2022 17:14:15 -0500
Subject: [PATCH 03/12] Merge duplicate visit modules.

---
 python/tester/upload.py | 2 +-
 python/tester/visit.py  | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)
 delete mode 120000 python/tester/visit.py

diff --git a/python/tester/upload.py b/python/tester/upload.py
index 73896c9e..d277f732 100644
--- a/python/tester/upload.py
+++ b/python/tester/upload.py
@@ -8,7 +8,7 @@
 import re
 import sys
 import time
-from visit import Visit
+from activator.visit import Visit
 
 
 @dataclass
diff --git a/python/tester/visit.py b/python/tester/visit.py
deleted file mode 120000
index fdaa3f7a..00000000
--- a/python/tester/visit.py
+++ /dev/null
@@ -1 +0,0 @@
-../activator/visit.py
\ No newline at end of file

From 54ac716fc9c158ae28256dd80b5f7e06920070ed Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Wed, 22 Jun 2022 17:27:31 -0500
Subject: [PATCH 04/12] Factor out shared raw path definitions.

---
 bin/prompt_prototype_upload_raws.sh |  2 +-
 python/activator/activator.py       | 11 ++----
 python/activator/raw.py             | 47 ++++++++++++++++++++++++
 python/tester/upload.py             | 28 +++-----------
 tests/test_raw.py                   | 57 +++++++++++++++++++++++++++++
 5 files changed, 113 insertions(+), 32 deletions(-)
 create mode 100644 python/activator/raw.py
 create mode 100644 tests/test_raw.py

diff --git a/bin/prompt_prototype_upload_raws.sh b/bin/prompt_prototype_upload_raws.sh
index 752c09c6..97e3885a 100755
--- a/bin/prompt_prototype_upload_raws.sh
+++ b/bin/prompt_prototype_upload_raws.sh
@@ -30,7 +30,7 @@ set -e  # Abort on any error
 RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw"
 UPLOAD_BUCKET=rubin-prompt-proto-unobserved
 
-# Filename format is defined in tester/upload.py and activator/activator.py:
+# Filename format is defined in activator/raw.py:
 # instrument/detector/group/snap/instrument-group-snap-exposureId-filter-detector
 gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \
     gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz
diff --git a/python/activator/activator.py b/python/activator/activator.py
index 7f819807..8dd3398b 100644
--- a/python/activator/activator.py
+++ b/python/activator/activator.py
@@ -36,6 +36,7 @@
 from lsst.obs.base import Instrument
 from .make_pgpass import make_pgpass
 from .middleware_interface import MiddlewareInterface
+from .raw import RAW_REGEXP
 from .visit import Visit
 
 PROJECT_ID = "prompt-proto"
@@ -46,12 +47,6 @@
 active_instrument = Instrument.from_string(config_instrument)
 calib_repo = os.environ["CALIB_REPO"]
 image_bucket = os.environ["IMAGE_BUCKET"]
-# Format for filenames of raws uploaded to image_bucket:
-# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz)
-oid_regexp = re.compile(
-    r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/"
-    r"(?P=instrument)-(?P=group)-(?P=snap)-(?P<expid>.*?)-(?P<filter>.*?)-(?P=detector)\.f"
-)
 timeout = os.environ.get("IMAGE_TIMEOUT", 50)
 
 logging.basicConfig(
@@ -185,7 +180,7 @@ def next_visit_handler() -> Tuple[str, int]:
                 expected_visit.detector,
             )
             if oid:
-                m = re.match(oid_regexp, oid)
+                m = re.match(RAW_REGEXP, oid)
                 mwi.ingest_image(oid)
                 expid_set.add(m.group('expid'))
 
@@ -218,7 +213,7 @@ def next_visit_handler() -> Tuple[str, int]:
             for received in response.received_messages:
                 ack_list.append(received.ack_id)
                 oid = received.message.attributes["objectId"]
-                m = re.match(oid_regexp, oid)
+                m = re.match(RAW_REGEXP, oid)
                 if m:
                     instrument, detector, group, snap, expid = m.groups()
                     _log.debug("instrument, detector, group, snap, expid = %s", m.groups())
diff --git a/python/activator/raw.py b/python/activator/raw.py
new file mode 100644
index 00000000..9e4308a5
--- /dev/null
+++ b/python/activator/raw.py
@@ -0,0 +1,47 @@
+# This file is part of prompt_prototype.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""Common definitions of raw paths.
+
+This module provides tools to convert raw paths into exposure metadata and
+vice versa.
+"""
+
+__all__ = ["RAW_REGEXP", "get_raw_path"]
+
+import re
+
+# Format for filenames of raws uploaded to image bucket:
+# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz)
+RAW_REGEXP = re.compile(
+    r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/"
+    r"(?P=instrument)-(?P=group)-(?P=snap)-(?P<expid>.*?)-(?P<filter>.*?)-(?P=detector)\.f"
+)
+
+
+def get_raw_path(instrument, detector, group, snap, exposure_id, filter):
+    """The path on which to store raws in the image bucket.
+    """
+    return (
+        f"{instrument}/{detector}/{group}/{snap}"
+        f"/{instrument}-{group}-{snap}"
+        f"-{exposure_id}-{filter}-{detector}.fz"
+    )
diff --git a/python/tester/upload.py b/python/tester/upload.py
index d277f732..756e449d 100644
--- a/python/tester/upload.py
+++ b/python/tester/upload.py
@@ -8,6 +8,7 @@
 import re
 import sys
 import time
+from activator.raw import RAW_REGEXP, get_raw_path
 from activator.visit import Visit
 
 
@@ -33,25 +34,6 @@ class Instrument:
 PROJECT_ID = "prompt-proto"
 
 
-def raw_path(instrument, detector, group, snap, exposure_id, filter):
-    """The path on which to store raws in the raw bucket.
-
-    This format is also assumed by ``activator/activator.py.``
-    """
-    return (
-        f"{instrument}/{detector}/{group}/{snap}"
-        f"/{instrument}-{group}-{snap}"
-        f"-{exposure_id}-{filter}-{detector}.fz"
-    )
-
-
-# TODO: unify the format code across prompt_prototype
-RAW_REGEXP = re.compile(
-    r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/"
-    r"(?P=instrument)-(?P=group)-(?P=snap)-(?P<expid>.*?)-(?P<filter>.*?)-(?P=detector)\.f"
-)
-
-
 logging.basicConfig(
     format="{levelname} {asctime} {name} - {message}",
     style="{",
@@ -367,8 +349,8 @@ def upload_from_pool(visit, snap_id):
             # the sent images so that they have the generated ID. Pipeline
             # execution fails if they don't match.
             exposure_id = make_exposure_id(visit.instrument, visit.group, snap_id)
-            filename = raw_path(visit.instrument, visit.detector, visit.group, snap_id,
-                                exposure_id, visit.filter)
+            filename = get_raw_path(visit.instrument, visit.detector, visit.group, snap_id,
+                                    exposure_id, visit.filter)
             src_bucket.copy_blob(src_blob, dest_bucket, new_name=filename)
         process_group(publisher, visit_infos, upload_from_pool)
         _log.info("Slewing to next group")
@@ -399,8 +381,8 @@ def upload_from_random(publisher, instrument, dest_bucket, n_groups, group_base)
         # closures for the bucket and data.
         def upload_dummy(visit, snap_id):
             exposure_id = make_exposure_id(visit.instrument, visit.group, snap_id)
-            filename = raw_path(visit.instrument, visit.detector, visit.group, snap_id,
-                                exposure_id, visit.filter)
+            filename = get_raw_path(visit.instrument, visit.detector, visit.group, snap_id,
+                                    exposure_id, visit.filter)
             dest_bucket.blob(filename).upload_from_string("Test")
         process_group(publisher, visit_infos, upload_dummy)
         _log.info("Slewing to next group")
diff --git a/tests/test_raw.py b/tests/test_raw.py
new file mode 100644
index 00000000..0fdf1d27
--- /dev/null
+++ b/tests/test_raw.py
@@ -0,0 +1,57 @@
+# This file is part of prompt_prototype.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import re
+import unittest
+
+from activator.raw import RAW_REGEXP, get_raw_path
+
+
+class RawTest(unittest.TestCase):
+    """Test the API for handling raw paths.
+    """
+    def setUp(self):
+        super().setUp()
+
+        self.instrument = "NotACam"
+        self.detector = 42
+        self.group = "2022032100001"
+        self.snaps = 2
+        self.filter = "k2022"
+        self.ra = 134.5454
+        self.dec = -65.3261
+        self.rot = 135.0
+        self.kind = "IMAGINARY"
+        self.snap = 1
+        self.exposure = 404
+
+    def test_writeread(self):
+        """Test that raw module can parse the paths it creates.
+        """
+        path = get_raw_path(self.instrument, self.detector, self.group, self.snap, self.exposure, self.filter)
+        parsed = re.match(RAW_REGEXP, path)
+        self.assertIsNotNone(parsed)
+        self.assertEqual(parsed['instrument'], str(self.instrument))
+        self.assertEqual(parsed['detector'], str(self.detector))
+        self.assertEqual(parsed['group'], str(self.group))
+        self.assertEqual(parsed['snap'], str(self.snap))
+        self.assertEqual(parsed['expid'], str(self.exposure))
+        self.assertEqual(parsed['filter'], str(self.filter))

From 4b8fa6a17c849fd6b9263a27adc486f04ceb8f19 Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Wed, 22 Jun 2022 18:02:02 -0500
Subject: [PATCH 05/12] Move all metadata to raw "directory".

This allows more flexibility in the raw filename, removing a constraint
on what naming convention the summit team can choose.
---
 bin/prompt_prototype_upload_raws.sh | 6 +++---
 python/activator/activator.py       | 2 +-
 python/activator/raw.py             | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/bin/prompt_prototype_upload_raws.sh b/bin/prompt_prototype_upload_raws.sh
index 97e3885a..1ba5fffd 100755
--- a/bin/prompt_prototype_upload_raws.sh
+++ b/bin/prompt_prototype_upload_raws.sh
@@ -31,8 +31,8 @@ RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw"
 UPLOAD_BUCKET=rubin-prompt-proto-unobserved
 
 # Filename format is defined in activator/raw.py:
-# instrument/detector/group/snap/instrument-group-snap-exposureId-filter-detector
+# instrument/detector/group/snap/exposureId/filter/instrument-group-snap-exposureId-filter-detector
 gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \
-    gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz
+    gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/0059150/HSC-G/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz
 gsutil cp "${RAW_DIR}/HSC-0059160-051.fits.gz" \
-    gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz
+    gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/0059150/HSC-G/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz
diff --git a/python/activator/activator.py b/python/activator/activator.py
index 8dd3398b..5afbf88f 100644
--- a/python/activator/activator.py
+++ b/python/activator/activator.py
@@ -116,7 +116,7 @@ def check_for_snap(
         was found. If multiple files match, this function logs an error
         but returns one of the files anyway.
     """
-    prefix = f"{instrument}/{detector}/{group}/{snap}/{instrument}-{group}-{snap}-"
+    prefix = f"{instrument}/{detector}/{group}/{snap}/"
     _log.debug(f"Checking for '{prefix}'")
     blobs = list(storage_client.list_blobs(image_bucket, prefix=prefix))
     if not blobs:
diff --git a/python/activator/raw.py b/python/activator/raw.py
index 9e4308a5..4a931f18 100644
--- a/python/activator/raw.py
+++ b/python/activator/raw.py
@@ -30,10 +30,10 @@
 import re
 
 # Format for filenames of raws uploaded to image bucket:
-# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz)
+# instrument/detector/group/snap/expid/filter/*.(fits, fz, fits.gz)
 RAW_REGEXP = re.compile(
-    r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/"
-    r"(?P=instrument)-(?P=group)-(?P=snap)-(?P<expid>.*?)-(?P<filter>.*?)-(?P=detector)\.f"
+    r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/(?P<expid>.*?)/(?P<filter>.*?)/"
+    r"[^/]+\.f"
 )
 
 
@@ -41,7 +41,7 @@ def get_raw_path(instrument, detector, group, snap, exposure_id, filter):
     """The path on which to store raws in the image bucket.
     """
     return (
-        f"{instrument}/{detector}/{group}/{snap}"
+        f"{instrument}/{detector}/{group}/{snap}/{exposure_id}/{filter}"
         f"/{instrument}-{group}-{snap}"
         f"-{exposure_id}-{filter}-{detector}.fz"
     )

From 81a7984b84fb97c97808e213cd63bf0a3177c349 Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Thu, 23 Jun 2022 17:58:32 -0500
Subject: [PATCH 06/12] Support a broader subset of HSC-COSMOS inputs to
 upload.py.

---
 python/tester/upload.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/tester/upload.py b/python/tester/upload.py
index 756e449d..b83cc6d6 100644
--- a/python/tester/upload.py
+++ b/python/tester/upload.py
@@ -257,7 +257,15 @@ def get_samples(bucket, instrument):
     #     upload time, another is to download the blob and actually read
     #     its header.
     hsc_metadata = {
+        59126: {"ra": 149.28531249999997, "dec": 2.935002777777778, "rot": 270.0},
+        59134: {"ra": 149.45749166666664, "dec": 2.926961111111111, "rot": 270.0},
+        59138: {"ra": 149.45739166666664, "dec": 1.4269472222222224, "rot": 270.0},
+        59142: {"ra": 149.4992083333333, "dec": 2.8853, "rot": 270.0},
         59150: {"ra": 149.96643749999996, "dec": 2.2202916666666668, "rot": 270.0},
+        59152: {"ra": 149.9247333333333, "dec": 2.1577777777777776, "rot": 270.0},
+        59154: {"ra": 150.22329166666663, "dec": 2.238341666666667, "rot": 270.0},
+        59156: {"ra": 150.26497083333334, "dec": 2.1966694444444443, "rot": 270.0},
+        59158: {"ra": 150.30668333333332, "dec": 2.2591888888888887, "rot": 270.0},
         59160: {"ra": 150.18157499999998, "dec": 2.2800083333333334, "rot": 270.0},
     }
 

From 230098e2c817ddbddf2e7115a3c78ee1c0cb3bae Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Mon, 27 Jun 2022 18:31:51 -0500
Subject: [PATCH 07/12] Clarify documentation for make_remote_butler.py.

---
 bin.src/make_remote_butler.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bin.src/make_remote_butler.py b/bin.src/make_remote_butler.py
index 594f4077..96f1f32a 100755
--- a/bin.src/make_remote_butler.py
+++ b/bin.src/make_remote_butler.py
@@ -25,7 +25,9 @@
 a source repository and export file.
 
 For most values of --target-repo and --seed-config, this script is only useful
-if run from the prompt-proto project on Google Cloud.
+if run from the prompt-proto project on Google Cloud (because of access
+restrictions to **both** the repository's storage location and its registry
+database).
 
 The user is responsible for clearing any old copies of the repository from
 both the target URI and the registry database.

From d32e0bb538ce6cf38230ab1cfdc8f05baf85d2ab Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Tue, 28 Jun 2022 17:26:33 -0500
Subject: [PATCH 08/12] Add more raws to input dataset.

---
 bin/prompt_prototype_upload_raws.sh | 36 +++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/bin/prompt_prototype_upload_raws.sh b/bin/prompt_prototype_upload_raws.sh
index 1ba5fffd..ab8307d3 100755
--- a/bin/prompt_prototype_upload_raws.sh
+++ b/bin/prompt_prototype_upload_raws.sh
@@ -20,19 +20,37 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-# This script uploads the raw files from the ap_verify_ci_cosmos_pdr2 dataset
-# to Google Storage. It renames the files to match prompt_prototype conventions.
-# The user must have gsutil already configured, and must have
-# ap_verify_ci_cosmos_pdr2 set up.
+# This script uploads the raw files from the HSC PDR2 run to Google Storage. It
+# renames the files to match prompt_prototype conventions. The user must have
+# gsutil already configured.
 
 set -e  # Abort on any error
 
-RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw"
+RAW_DIR="/datasets/hsc/raw/ssp_pdr2/2016-03-07"
 UPLOAD_BUCKET=rubin-prompt-proto-unobserved
 
 # Filename format is defined in activator/raw.py:
 # instrument/detector/group/snap/exposureId/filter/instrument-group-snap-exposureId-filter-detector
-gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \
-    gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/0059150/HSC-G/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz
-gsutil cp "${RAW_DIR}/HSC-0059160-051.fits.gz" \
-    gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/0059150/HSC-G/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz
+gsutil cp "${RAW_DIR}/HSCA05913553.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/0/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-0.fits
+gsutil cp "${RAW_DIR}/HSCA05913542.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/4/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-4.fits
+gsutil cp "${RAW_DIR}/HSCA05913543.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/5/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-5.fits
+
+gsutil cp "${RAW_DIR}/HSCA05914353.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/0/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-0.fits
+gsutil cp "${RAW_DIR}/HSCA05914343.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/5/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-5.fits
+gsutil cp "${RAW_DIR}/HSCA05914337.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/11/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-11.fits
+
+gsutil cp "${RAW_DIR}/HSCA05915112.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/50/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-50.fits
+gsutil cp "${RAW_DIR}/HSCA05915116.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/58/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-58.fits
+
+gsutil cp "${RAW_DIR}/HSCA05916109.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/43/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-43.fits
+gsutil cp "${RAW_DIR}/HSCA05916113.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/51/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-51.fits

From 253f778657859c6f581a0fa139ae80a1ce71f12b Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Mon, 11 Jul 2022 16:33:22 -0500
Subject: [PATCH 09/12] Remove upload.py support for arbitrarily large numbers
 of groups.

The increase in the raw dataset size makes it unnecessary, and it
complicates ID management.
---
 python/tester/upload.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/tester/upload.py b/python/tester/upload.py
index b83cc6d6..f32507ff 100644
--- a/python/tester/upload.py
+++ b/python/tester/upload.py
@@ -333,8 +333,17 @@ def upload_from_raws(publisher, instrument, raw_pool, src_bucket, dest_bucket, n
         group IDs.
     group_base : `int`
         The base number from which to offset new group numbers.
+
+    Exceptions
+    ----------
+    ValueError
+        Raised if ``n_groups`` exceeds the number of groups in ``raw_pool``.
     """
-    for i, true_group in enumerate(itertools.islice(itertools.cycle(raw_pool), n_groups)):
+    if n_groups > len(raw_pool):
+        raise ValueError(f"Requested {n_groups} groups, but only {len(raw_pool)} "
+                         "unobserved raws are available.")
+
+    for i, true_group in enumerate(itertools.islice(raw_pool, n_groups)):
         group = group_base + i
         _log.debug(f"Processing group {group} from unobserved {true_group}...")
         # snap_dict maps snap_id to {visit: blob}

From 42f87dfc8428d7bce8ba6059ec101093fa9f2aee Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Mon, 11 Jul 2022 17:23:36 -0500
Subject: [PATCH 10/12] Stop overwriting exposure ID of uploaded files.

---
 python/tester/upload.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/tester/upload.py b/python/tester/upload.py
index f32507ff..58d8e360 100644
--- a/python/tester/upload.py
+++ b/python/tester/upload.py
@@ -362,10 +362,9 @@ def upload_from_raws(publisher, instrument, raw_pool, src_bucket, dest_bucket, n
         # closures for the bucket and data.
         def upload_from_pool(visit, snap_id):
             src_blob = snap_dict[snap_id][visit]
-            # TODO: use the images' native exposure ID here, or (better) hack
-            # the sent images so that they have the generated ID. Pipeline
-            # execution fails if they don't match.
-            exposure_id = make_exposure_id(visit.instrument, visit.group, snap_id)
+            # TODO: converting raw_pool from a nested mapping to an indexable
+            # custom class would make it easier to include such metadata as expId.
+            exposure_id = int(re.match(RAW_REGEXP, src_blob.name).group('expid'))
             filename = get_raw_path(visit.instrument, visit.detector, visit.group, snap_id,
                                     exposure_id, visit.filter)
             src_bucket.copy_blob(src_blob, dest_bucket, new_name=filename)

From bf7ad83768d2cdf7d2d8d739477ce07324c88030 Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Tue, 12 Jul 2022 14:23:07 -0500
Subject: [PATCH 11/12] Process only the requested exposure IDs in
 run_pipeline.

---
 python/activator/middleware_interface.py | 24 ++++++------------------
 tests/test_middleware_interface.py       | 12 ------------
 2 files changed, 6 insertions(+), 30 deletions(-)

diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py
index ac51766e..29cbbc38 100644
--- a/python/activator/middleware_interface.py
+++ b/python/activator/middleware_interface.py
@@ -386,28 +386,16 @@ def run_pipeline(self, visit: Visit, exposure_ids: set) -> None:
             Group of snaps from one detector to be processed.
         exposure_ids : `set`
             Identifiers of the exposures that were received.
-            TODO: We need to be careful about the distinction between snap IDs
-            (a running series from 0 to N-1) and exposure IDs (which are more
-            complex and encode other info). Butler currently does not recognize
-            a snap ID, as such.
-            TODO: I believe this is unnecessary because it should be encoded
-            in the `visit` object, but we'll have to test how that works once
-            we implemented this with actual data.
         """
         # TODO: we want to define visits earlier, but we have to ingest a
         # faked raw file and appropriate SSO data during prep (and then
         # cleanup when ingesting the real data).
-        # TODO: Also, using this approach (instead of saving the datasetRefs
-        # returned by ingest and using them to define visits) also requires
-        # pruning this list down to only the exposures that aren't already
-        # defined (otherwise defineVisits.run does extra "nothing" work).
-        exposures = set(self.butler.registry.queryDataIds(["exposure"]))
-        self.define_visits.run(exposures)
-
-        # TODO: temporary workaround for uploader and image header not agreeing
-        # on what the exposure ID is. We use the full exposure list here
-        # because we can't support multiple visits anyway.
-        exposure_ids = {data_id["exposure"] for data_id in exposures}
+        try:
+            self.define_visits.run({"instrument": self.instrument.getName(),
+                                    "exposure": exp} for exp in exposure_ids)
+        except lsst.daf.butler.registry.DataIdError as e:
+            # TODO: a good place for a custom exception?
+            raise RuntimeError("No data to process.") from e
 
         # TODO: can we move this from_pipeline call to prep_butler?
         where = f"detector={visit.detector} and exposure in ({','.join(str(x) for x in exposure_ids)})"
diff --git a/tests/test_middleware_interface.py b/tests/test_middleware_interface.py
index c826390d..b556bd8f 100644
--- a/tests/test_middleware_interface.py
+++ b/tests/test_middleware_interface.py
@@ -291,7 +291,6 @@ def test_run_pipeline(self):
             self.interface.run_pipeline(self.next_visit, {1})
         mock_run.assert_called_once_with(register_dataset_types=True)
 
-    @unittest.skip("run_pipeline ignores the exposure_ids arg, so we can't test passing invalid ones.")
     def test_run_pipeline_empty_quantum_graph(self):
         """Test that running a pipeline that results in an empty quantum graph
         (because the exposure ids are wrong), raises.
@@ -310,14 +309,3 @@ def test_run_pipeline_empty_quantum_graph(self):
 
         with self.assertRaisesRegex(RuntimeError, "No data to process"):
             self.interface.run_pipeline(self.next_visit, {2})
-
-    def test_run_pipeline_missing_raws(self):
-        """Test that running a pipeline with no raws, raises.
-        """
-        # Have to setup the data so that we can create the pipeline executor.
-        self.interface.prep_butler(self.next_visit)
-
-        # TODO: this exception will almost certainly change once we change how
-        # we do defineVisits.
-        with self.assertRaisesRegex(RuntimeError, "No exposures given"):
-            self.interface.run_pipeline(self.next_visit, {2})

From 63b7e69137eb6a7a29024b466fa04bc13479db0d Mon Sep 17 00:00:00 2001
From: Krzysztof Findeisen <kfindeis@uw.edu>
Date: Tue, 12 Jul 2022 16:38:07 -0500
Subject: [PATCH 12/12] Avoid hardcoded collection names in
 MiddlewareInterface.

---
 python/activator/middleware_interface.py | 31 +++++++++++++++---------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py
index 29cbbc38..793dd1f6 100644
--- a/python/activator/middleware_interface.py
+++ b/python/activator/middleware_interface.py
@@ -75,6 +75,12 @@ class MiddlewareInterface:
         appropriate for use in the Google Cloud environment; typically only
         change this when running local tests.
     """
+    _COLLECTION_TEMPLATE = "templates"
+    """The collection used for templates.
+    """
+    _COLLECTION_SKYMAP = "skymaps"
+    """The collection used for skymaps.
+    """
 
     def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
                  butler: Butler,
@@ -90,7 +96,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
             self._download_store = None
         self.instrument = lsst.obs.base.Instrument.from_string(instrument)
 
-        self.output_collection = f"{self.instrument.getName()}/prompt"
+        self.output_collection = self.instrument.makeCollectionName("prompt")
 
         self._init_local_butler(butler)
         self._init_ingester()
@@ -104,7 +110,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
         # This code will break once cameras start being versioned.
         self.camera = self.central_butler.get(
             "camera", instrument=self.instrument.getName(),
-            collections=self.instrument.makeCalibrationCollectionName("unbounded")
+            collections=self.instrument.makeUnboundedCalibrationRunName()
         )
         self.skymap = self.central_butler.get("skyMap")
 
@@ -170,9 +176,9 @@ def prep_butler(self, visit: Visit) -> None:
                 self._export_calibs(export, visit.detector, visit.filter)
 
                 # CHAINED collections
-                export.saveCollection("refcats")
-                export.saveCollection("templates")
-                export.saveCollection(self.instrument.makeCollectionName("defaults"))
+                export.saveCollection(self.instrument.makeRefCatCollectionName())
+                export.saveCollection(self._COLLECTION_TEMPLATE)
+                export.saveCollection(self.instrument.makeUmbrellaCollectionName())
 
             self.butler.import_(filename=export_file.name,
                                 directory=self.central_butler.datastore.root,
@@ -203,10 +209,11 @@ def _export_refcats(self, export, center, radius):
         # collection, so we have to specify a list here. Replace this
         # with another solution ASAP.
         possible_refcats = ["gaia", "panstarrs", "gaia_dr2_20200414", "ps1_pv3_3pi_20170110"]
-        export.saveDatasets(self.central_butler.registry.queryDatasets(possible_refcats,
-                                                                       collections="refcats",
-                                                                       where=htm_where,
-                                                                       findFirst=True))
+        export.saveDatasets(self.central_butler.registry.queryDatasets(
+            possible_refcats,
+            collections=self.instrument.makeRefCatCollectionName(),
+            where=htm_where,
+            findFirst=True))
 
     def _export_skymap_and_templates(self, export, center, detector, wcs):
         """Export the skymap and templates for this visit from the central
@@ -229,7 +236,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs):
         # otherwise we get a UNIQUE constraint error when prepping for the
         # second visit.
         export.saveDatasets(self.central_butler.registry.queryDatasets("skyMap",
-                                                                       collections="skymaps",
+                                                                       collections=self._COLLECTION_SKYMAP,
                                                                        findFirst=True))
         # Getting only one tract should be safe: we're getting the
         # tract closest to this detector, so we should be well within
@@ -247,7 +254,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs):
         # TODO: alternately, can we just assume that there is exactly
         # one coadd type in the central butler?
         export.saveDatasets(self.central_butler.registry.queryDatasets("*Coadd",
-                                                                       collections="templates",
+                                                                       collections=self._COLLECTION_TEMPLATE,
                                                                        where=template_where))
 
     def _export_calibs(self, export, detector_id, filter):
@@ -295,7 +302,7 @@ def _prep_collections(self):
                                                 CollectionType.RUN)
         self.butler.registry.registerCollection(self.output_run, CollectionType.RUN)
         self.butler.registry.registerCollection(self.output_collection, CollectionType.CHAINED)
-        collections = [self.instrument.makeCollectionName("defaults"),
+        collections = [self.instrument.makeUmbrellaCollectionName(),
                        self.instrument.makeDefaultRawIngestRunName(),
                        self.output_run]
         self.butler.registry.setCollectionChain(self.output_collection, collections)