Merge branch 'tickets/DM-44393'

lsst-dm · May 16, 2024 · 4be1cf4 · 4be1cf4
2 parents 9863090 + 4f6cffe
commit 4be1cf4
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 15 deletions.
diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py
@@ -1283,21 +1283,7 @@ def _export_subset(self, exposure_ids: set[int],
             # Transferring governor dimensions in parallel can cause deadlocks in
             # central registry. We need to transfer our exposure/visit dimensions,
             # so handle those manually.
-            for dimension in ["group",
-                              "day_obs",
-                              "exposure",
-                              "visit",
-                              ]:
-                if dimension in self.butler.registry.dimensions:
-                    records = self.butler.registry.queryDimensionRecords(
-                        dimension,
-                        where="exposure in (exposure_ids)",
-                        bind={"exposure_ids": exposure_ids},
-                        instrument=self.instrument.getName(),
-                        detector=self.visit.detector,
-                    )
-                    # If records don't match, this is not an error, and central takes precedence.
-                    self.central_butler.registry.insertDimensionData(dimension, *records, skip_existing=True)
+            self._export_exposure_dimensions(exposure_ids)
             transferred = self.central_butler.transfer_from(self.butler, datasets,
                                                             transfer="copy", transfer_dimensions=False)
             if len(transferred) != len(datasets):
@@ -1307,6 +1293,44 @@ def _export_subset(self, exposure_ids: set[int],
 
         return transferred
 
+    def _export_exposure_dimensions(self, exposure_ids):
+        """Transfer dimensions generated from an exposure to the central repo.
+
+        In many cases the exposure records will already exist in the central
+        repo, but this is not guaranteed (especially in dev environments).
+        Visit records never exist in the central repo and are the sole
+        responsibility of Prompt Processing.
+
+        Parameters
+        ----------
+        exposure_ids : `set` [`int`]
+            Identifiers of the exposures that were processed.
+        """
+        core_dimensions = ["group",
+                           "day_obs",
+                           "exposure",
+                           "visit",
+                           "visit_system",
+                           ]
+        universe = self.butler.dimensions
+
+        full_dimensions = [universe[d] for d in core_dimensions if d in universe]
+        extra_dimensions = []
+        for d in full_dimensions:
+            extra_dimensions.extend(universe.get_elements_populated_by(universe[d]))
+        sorted_dimensions = universe.sorted(full_dimensions + extra_dimensions)
+
+        for dimension in sorted_dimensions:
+            records = self.butler.registry.queryDimensionRecords(
+                dimension,
+                where="exposure in (exposure_ids)",
+                bind={"exposure_ids": exposure_ids},
+                instrument=self.instrument.getName(),
+                detector=self.visit.detector,
+            )
+            # If records don't match, this is not an error, and central takes precedence.
+            self.central_butler.registry.insertDimensionData(dimension, *records, skip_existing=True)
+
     def _chain_exports(self, output_chain: str, output_runs: collections.abc.Iterable[str]) -> None:
         """Associate exported datasets with a chained collection in the
         central Butler.

diff --git a/tests/test_middleware_interface.py b/tests/test_middleware_interface.py
@@ -1006,6 +1006,13 @@ def test_export_outputs(self):
         central_butler = Butler(self.central_repo.name, writeable=False)
         self.assertEqual(self._count_datasets(central_butler, "calexp", self.output_run), 2)
         self.assertEqual(self._count_datasets(central_butler, "calexp", self.output_chain), 2)
+        # Should be able to look up datasets by both visit and exposure
+        self.assertEqual(
+            self._count_datasets_with_id(central_butler, "calexp", self.output_run, self.raw_data_id),
+            1)
+        self.assertEqual(
+            self._count_datasets_with_id(central_butler, "calexp", self.output_run, self.second_data_id),
+            1)
         self.assertEqual(
             self._count_datasets_with_id(central_butler, "calexp", self.output_run, self.processed_data_id),
             1)