lsst-dm · kfindeisen · Jul 18, 2022 · Jun 21, 2022 · Jun 21, 2022 · Jun 22, 2022
diff --git a/.github/workflows/build-service.yml b/.github/workflows/build-service.yml
@@ -42,6 +42,8 @@ jobs:
                   cd /home/lsst/prompt_prototype
                   source /opt/lsst/software/stack/loadLSST.bash
                   setup -r .
+                  # Fix permissions; arg must be absolute path.
+                  git config --global --add safe.directory /home/lsst/prompt_prototype
                   scons'
 
   update-service-image:

diff --git a/bin.src/make_remote_butler.py b/bin.src/make_remote_butler.py
@@ -25,7 +25,9 @@
 a source repository and export file.
 
 For most values of --target-repo and --seed-config, this script is only useful
-if run from the prompt-proto project on Google Cloud.
+if run from the prompt-proto project on Google Cloud (because of access
+restrictions to **both** the repository's storage location and its registry
+database).
 
 The user is responsible for clearing any old copies of the repository from
 both the target URI and the registry database.

diff --git a/bin/prompt_prototype_upload_raws.sh b/bin/prompt_prototype_upload_raws.sh
@@ -20,19 +20,37 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-# This script uploads the raw files from the ap_verify_ci_cosmos_pdr2 dataset
-# to Google Storage. It renames the files to match prompt_prototype conventions.
-# The user must have gsutil already configured, and must have
-# ap_verify_ci_cosmos_pdr2 set up.
+# This script uploads the raw files from the HSC PDR2 run to Google Storage. It
+# renames the files to match prompt_prototype conventions. The user must have
+# gsutil already configured.
 
 set -e  # Abort on any error
 
-RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw"
+RAW_DIR="/datasets/hsc/raw/ssp_pdr2/2016-03-07"
 UPLOAD_BUCKET=rubin-prompt-proto-unobserved
 
-# Filename format is defined in tester/upload.py and activator/activator.py:
-# instrument/detector/group/snap/instrument-group-snap-exposureId-filter-detector
-gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \
-    gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz
-gsutil cp "${RAW_DIR}/HSC-0059160-051.fits.gz" \
-    gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz
+# Filename format is defined in activator/raw.py:
+# instrument/detector/group/snap/exposureId/filter/instrument-group-snap-exposureId-filter-detector
+gsutil cp "${RAW_DIR}/HSCA05913553.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/0/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-0.fits
+gsutil cp "${RAW_DIR}/HSCA05913542.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/4/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-4.fits
+gsutil cp "${RAW_DIR}/HSCA05913543.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/5/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-5.fits
+
+gsutil cp "${RAW_DIR}/HSCA05914353.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/0/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-0.fits
+gsutil cp "${RAW_DIR}/HSCA05914343.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/5/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-5.fits
+gsutil cp "${RAW_DIR}/HSCA05914337.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/11/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-11.fits
+
+gsutil cp "${RAW_DIR}/HSCA05915112.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/50/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-50.fits
+gsutil cp "${RAW_DIR}/HSCA05915116.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/58/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-58.fits
+
+gsutil cp "${RAW_DIR}/HSCA05916109.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/43/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-43.fits
+gsutil cp "${RAW_DIR}/HSCA05916113.fits" \
+    gs://${UPLOAD_BUCKET}/HSC/51/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-51.fits
diff --git a/pipelines/calibrate.py b/pipelines/calibrate.py
@@ -1,32 +1,15 @@
 # Config override for lsst.pipe.tasks.calibrate.CalibrateTask
 # This config is for the ap_verify datasets that use gaia/panstarrs as their
 # refcat names.
-from lsst.meas.algorithms import LoadIndexedReferenceObjectsTask
 
 # Use gaia for astrometry (phot_g_mean for everything, as that is the broadest
-# band with the most depth)
-# Use panstarrs for photometry (grizy filters)
-for refObjLoader in (config.astromRefObjLoader,
-                     config.photoRefObjLoader,):
-    refObjLoader.retarget(LoadIndexedReferenceObjectsTask)
+# band with the most depth).
+
 config.connections.astromRefCat = "gaia"
 config.astromRefObjLoader.ref_dataset_name = config.connections.astromRefCat
-config.astromRefObjLoader.filterMap = {
-    "u": "phot_g_mean",
-    "g": "phot_g_mean",
-    "r": "phot_g_mean",
-    "i": "phot_g_mean",
-    "z": "phot_g_mean",
-    "y": "phot_g_mean",
-    "VR": "phot_g_mean"}
+config.astromRefObjLoader.anyFilterMapsToThis = "phot_g_mean"
+config.astromRefObjLoader.filterMap = {}
+
+# Use panstarrs for photometry (grizy filters).
 config.connections.photoRefCat = "panstarrs"
 config.photoRefObjLoader.ref_dataset_name = config.connections.photoRefCat
-config.photoRefObjLoader.filterMap = {
-    "u": "g",
-    # TODO: workaround for DM-29186
-    # "g": "g",
-    # "r": "r",
-    # "i": "i",
-    # "z": "z",
-    # "y": "y",
-    "VR": "g"}
diff --git a/python/activator/activator.py b/python/activator/activator.py
@@ -36,6 +36,7 @@
 from lsst.obs.base import Instrument
 from .make_pgpass import make_pgpass
 from .middleware_interface import MiddlewareInterface
+from .raw import RAW_REGEXP
 from .visit import Visit
 
 PROJECT_ID = "prompt-proto"
@@ -46,12 +47,6 @@
 active_instrument = Instrument.from_string(config_instrument)
 calib_repo = os.environ["CALIB_REPO"]
 image_bucket = os.environ["IMAGE_BUCKET"]
-# Format for filenames of raws uploaded to image_bucket:
-# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz)
-oid_regexp = re.compile(
-    r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/"
-    r"(?P=instrument)-(?P=group)-(?P=snap)-(?P<expid>.*?)-(?P<filter>.*?)-(?P=detector)\.f"
-)
 timeout = os.environ.get("IMAGE_TIMEOUT", 50)
 
 logging.basicConfig(
@@ -121,7 +116,7 @@ def check_for_snap(
         was found. If multiple files match, this function logs an error
         but returns one of the files anyway.
     """
-    prefix = f"{instrument}/{detector}/{group}/{snap}/{instrument}-{group}-{snap}-"
+    prefix = f"{instrument}/{detector}/{group}/{snap}/"
     _log.debug(f"Checking for '{prefix}'")
     blobs = list(storage_client.list_blobs(image_bucket, prefix=prefix))
     if not blobs:
@@ -185,7 +180,7 @@ def next_visit_handler() -> Tuple[str, int]:
                 expected_visit.detector,
             )
             if oid:
-                m = re.match(oid_regexp, oid)
+                m = re.match(RAW_REGEXP, oid)
                 mwi.ingest_image(oid)
                 expid_set.add(m.group('expid'))
 
@@ -218,7 +213,7 @@ def next_visit_handler() -> Tuple[str, int]:
             for received in response.received_messages:
                 ack_list.append(received.ack_id)
                 oid = received.message.attributes["objectId"]
-                m = re.match(oid_regexp, oid)
+                m = re.match(RAW_REGEXP, oid)
                 if m:
                     instrument, detector, group, snap, expid = m.groups()
                     _log.debug("instrument, detector, group, snap, expid = %s", m.groups())

diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py
@@ -75,6 +75,12 @@ class MiddlewareInterface:
         appropriate for use in the Google Cloud environment; typically only
         change this when running local tests.
     """
+    _COLLECTION_TEMPLATE = "templates"
+    """The collection used for templates.
+    """
+    _COLLECTION_SKYMAP = "skymaps"
+    """The collection used for skymaps.
+    """
 
     def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
                  butler: Butler,
@@ -90,7 +96,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
             self._download_store = None
         self.instrument = lsst.obs.base.Instrument.from_string(instrument)
 
-        self.output_collection = f"{self.instrument.getName()}/prompt"
+        self.output_collection = self.instrument.makeCollectionName("prompt")
 
         self._init_local_butler(butler)
         self._init_ingester()
@@ -104,7 +110,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
         # This code will break once cameras start being versioned.
         self.camera = self.central_butler.get(
             "camera", instrument=self.instrument.getName(),
-            collections=self.instrument.makeCalibrationCollectionName("unbounded")
+            collections=self.instrument.makeUnboundedCalibrationRunName()
         )
         self.skymap = self.central_butler.get("skyMap")
 
@@ -170,9 +176,9 @@ def prep_butler(self, visit: Visit) -> None:
                 self._export_calibs(export, visit.detector, visit.filter)
 
                 # CHAINED collections
-                export.saveCollection("refcats")
-                export.saveCollection("templates")
-                export.saveCollection(self.instrument.makeCollectionName("defaults"))
+                export.saveCollection(self.instrument.makeRefCatCollectionName())
+                export.saveCollection(self._COLLECTION_TEMPLATE)
+                export.saveCollection(self.instrument.makeUmbrellaCollectionName())
 
             self.butler.import_(filename=export_file.name,
                                 directory=self.central_butler.datastore.root,
@@ -203,10 +209,11 @@ def _export_refcats(self, export, center, radius):
         # collection, so we have to specify a list here. Replace this
         # with another solution ASAP.
         possible_refcats = ["gaia", "panstarrs", "gaia_dr2_20200414", "ps1_pv3_3pi_20170110"]
-        export.saveDatasets(self.central_butler.registry.queryDatasets(possible_refcats,
-                                                                       collections="refcats",
-                                                                       where=htm_where,
-                                                                       findFirst=True))
+        export.saveDatasets(self.central_butler.registry.queryDatasets(
+            possible_refcats,
+            collections=self.instrument.makeRefCatCollectionName(),
+            where=htm_where,
+            findFirst=True))
 
     def _export_skymap_and_templates(self, export, center, detector, wcs):
         """Export the skymap and templates for this visit from the central
@@ -229,7 +236,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs):
         # otherwise we get a UNIQUE constraint error when prepping for the
         # second visit.
         export.saveDatasets(self.central_butler.registry.queryDatasets("skyMap",
-                                                                       collections="skymaps",
+                                                                       collections=self._COLLECTION_SKYMAP,
                                                                        findFirst=True))
         # Getting only one tract should be safe: we're getting the
         # tract closest to this detector, so we should be well within
@@ -247,7 +254,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs):
         # TODO: alternately, can we just assume that there is exactly
         # one coadd type in the central butler?
         export.saveDatasets(self.central_butler.registry.queryDatasets("*Coadd",
-                                                                       collections="templates",
+                                                                       collections=self._COLLECTION_TEMPLATE,
                                                                        where=template_where))
 
     def _export_calibs(self, export, detector_id, filter):
@@ -295,7 +302,7 @@ def _prep_collections(self):
                                                 CollectionType.RUN)
         self.butler.registry.registerCollection(self.output_run, CollectionType.RUN)
         self.butler.registry.registerCollection(self.output_collection, CollectionType.CHAINED)
-        collections = [self.instrument.makeCollectionName("defaults"),
+        collections = [self.instrument.makeUmbrellaCollectionName(),
                        self.instrument.makeDefaultRawIngestRunName(),
                        self.output_run]
         self.butler.registry.setCollectionChain(self.output_collection, collections)
@@ -386,28 +393,16 @@ def run_pipeline(self, visit: Visit, exposure_ids: set) -> None:
             Group of snaps from one detector to be processed.
         exposure_ids : `set`
             Identifiers of the exposures that were received.
-            TODO: We need to be careful about the distinction between snap IDs
-            (a running series from 0 to N-1) and exposure IDs (which are more
-            complex and encode other info). Butler currently does not recognize
-            a snap ID, as such.
-            TODO: I believe this is unnecessary because it should be encoded
-            in the `visit` object, but we'll have to test how that works once
-            we implemented this with actual data.
         """
         # TODO: we want to define visits earlier, but we have to ingest a
         # faked raw file and appropriate SSO data during prep (and then
         # cleanup when ingesting the real data).
-        # TODO: Also, using this approach (instead of saving the datasetRefs
-        # returned by ingest and using them to define visits) also requires
-        # pruning this list down to only the exposures that aren't already
-        # defined (otherwise defineVisits.run does extra "nothing" work).
-        exposures = set(self.butler.registry.queryDataIds(["exposure"]))
-        self.define_visits.run(exposures)
-
-        # TODO: temporary workaround for uploader and image header not agreeing
-        # on what the exposure ID is. We use the full exposure list here
-        # because we can't support multiple visits anyway.
-        exposure_ids = {data_id["exposure"] for data_id in exposures}
+        try:
+            self.define_visits.run({"instrument": self.instrument.getName(),
+                                    "exposure": exp} for exp in exposure_ids)
+        except lsst.daf.butler.registry.DataIdError as e:
+            # TODO: a good place for a custom exception?
+            raise RuntimeError("No data to process.") from e
 
         # TODO: can we move this from_pipeline call to prep_butler?
         where = f"detector={visit.detector} and exposure in ({','.join(str(x) for x in exposure_ids)})"

diff --git a/python/activator/raw.py b/python/activator/raw.py
@@ -0,0 +1,47 @@
+# This file is part of prompt_prototype.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""Common definitions of raw paths.
+
+This module provides tools to convert raw paths into exposure metadata and
+vice versa.
+"""
+
+__all__ = ["RAW_REGEXP", "get_raw_path"]
+
+import re
+
+# Format for filenames of raws uploaded to image bucket:
+# instrument/detector/group/snap/expid/filter/*.(fits, fz, fits.gz)
+RAW_REGEXP = re.compile(
+    r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/(?P<expid>.*?)/(?P<filter>.*?)/"
+    r"[^/]+\.f"
+)
+
+
+def get_raw_path(instrument, detector, group, snap, exposure_id, filter):
+    """The path on which to store raws in the image bucket.
+    """
+    return (
+        f"{instrument}/{detector}/{group}/{snap}/{exposure_id}/{filter}"
+        f"/{instrument}-{group}-{snap}"
+        f"-{exposure_id}-{filter}-{detector}.fz"
+    )