Merge pull request #703 from lsst/tickets/DM-32061

DM-32061: Fix some issues with export-calibs command
lsst · Jun 29, 2022 · aeba0b4 · aeba0b4
2 parents 3ec7b18 + c893ea6
commit aeba0b4
Show file tree

Hide file tree

Showing 10 changed files with 92 additions and 13 deletions.
diff --git a/doc/changes/DM-32061.bugfix.rst b/doc/changes/DM-32061.bugfix.rst
@@ -0,0 +1,2 @@
+``butler export-calibs`` can now copy files that require the use of a file template (for example if a direct URI was stored in datastore) with metadata records.
+File templates that use metadata records now complain if the record is not attached to the ``DatasetRef``.
diff --git a/doc/changes/DM-32061.feature.rst b/doc/changes/DM-32061.feature.rst
@@ -0,0 +1,2 @@
+``butler export-calibs`` now takes a ``--transfer`` option to control how data are exported (use ``direct`` to do in-place export) and a ``--datasets`` option to limit the dataset types to be exported.
+It also now takes a default collections parameter (all calibration collections).
diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -825,8 +825,10 @@ def register_dataset_type(**kwargs):
 
 @click.command(cls=ButlerCommand)
 @repo_argument(required=True)
-@directory_argument(required=True)
+@directory_argument(required=True, help="DIRECTORY is the folder to receive the exported calibrations.")
 @collections_argument(help="COLLECTIONS are the collection to export calibrations from.")
+@dataset_type_option(help="Specific DatasetType(s) to export.", multiple=True)
+@transfer_option()
 def export_calibs(*args, **kwargs):
     """Export calibrations from the butler for import elsewhere."""
     table = script.exportCalibs(*args, **kwargs)

diff --git a/python/lsst/daf/butler/core/fileTemplates.py b/python/lsst/daf/butler/core/fileTemplates.py
@@ -431,6 +431,9 @@ def format(self, ref: DatasetRef) -> str:
             Raised if the requested field is not defined and the field is
             not optional.  Or, `component` is specified but "component" was
             not part of the template.
+        RuntimeError
+            Raised if a template uses dimension record metadata but no
+            records are attached to the `DatasetRef`.
         """
         # Extract defined non-None dimensions from the dataId.
         # This guards against Nones being explicitly present in the data ID
@@ -492,9 +495,30 @@ def format(self, ref: DatasetRef) -> str:
                 if primary in extras:
                     record = extras[primary]
                     # Only fill in the fields if we have a value, the
-                    # KeyError will trigger below if the attribute is missing.
+                    # KeyError will trigger below if the attribute is missing,
+                    # but only if it is not optional. This is most likely
+                    # a typo in the metadata field and so should be reported
+                    # even if optional.
                     if hasattr(record, secondary):
                         fields[field_name] = getattr(record, secondary)
+                    else:
+                        # Is a log message sufficient?
+                        log.info(
+                            "Template field %s could not be resolved because metadata field %s"
+                            " is not understood for dimension %s. Template entry will be ignored",
+                            field_name,
+                            secondary,
+                            primary,
+                        )
+                elif primary in fields:
+                    # We do have an entry for the primary but do not have any
+                    # secondary entries. This is likely a problem with the
+                    # code failing to attach a record to the DatasetRef.
+                    raise RuntimeError(
+                        f"No metadata records attached to dataset {ref}"
+                        f" when attempting to expand field {field_name}."
+                        " Either expand the DatasetRef or change the template."
+                    )
 
             if field_name in fields:
                 value = fields[field_name]

diff --git a/python/lsst/daf/butler/datastores/fileDatastore.py b/python/lsst/daf/butler/datastores/fileDatastore.py
@@ -2680,6 +2680,15 @@ def export(
         if transfer is not None and directory is None:
             raise RuntimeError(f"Cannot export using transfer mode {transfer} with no export directory given")
 
+        if transfer == "move":
+            raise RuntimeError("Can not export by moving files out of datastore.")
+        elif transfer == "direct":
+            # For an export, treat this as equivalent to None. We do not
+            # want an import to risk using absolute URIs to datasets owned
+            # by another datastore.
+            log.info("Treating 'direct' transfer mode as in-place export.")
+            transfer = None
+
         # Force the directory to be a URI object
         directoryUri: Optional[ResourcePath] = None
         if directory is not None:

diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py
@@ -65,12 +65,16 @@ def parseCalibrationCollection(registry, collection, datasetTypes):
             calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION]
         )
         for result in associations:
-            exportDatasets.append(result.ref)
-            exportCollections.append(result.ref.run)
+            # Need an expanded dataId in case file templates will be used
+            # in the transfer.
+            dataId = registry.expandDataId(result.ref.dataId)
+            ref = result.ref.expanded(dataId)
+            exportDatasets.append(ref)
+            exportCollections.append(ref.run)
     return exportCollections, exportDatasets
 
 
-def exportCalibs(repo, directory, collections):
+def exportCalibs(repo, directory, collections, dataset_type, transfer):
     """Certify a set of calibrations with a validity range.
 
     Parameters
@@ -85,6 +89,10 @@ def exportCalibs(repo, directory, collections):
        Data collections to pull calibrations from.  Must be an
        existing `~CollectionType.CHAINED` or
        `~CollectionType.CALIBRATION` collection.
+    dataset_type : `tuple` [`str`]
+       The dataset types to export. Default is to export all.
+    transfer : `str`
+        The transfer mode to use for exporting.
 
     Returns
     -------
@@ -99,8 +107,15 @@ def exportCalibs(repo, directory, collections):
     """
     butler = Butler(repo, writeable=False)
 
+    if not dataset_type:
+        dataset_type = ...
+    if not collections:
+        collections = ...
+
     calibTypes = [
-        datasetType for datasetType in butler.registry.queryDatasetTypes(...) if datasetType.isCalibration()
+        datasetType
+        for datasetType in butler.registry.queryDatasetTypes(dataset_type)
+        if datasetType.isCalibration()
     ]
 
     collectionsToExport = []
@@ -127,7 +142,7 @@ def exportCalibs(repo, directory, collections):
     if os.path.exists(directory):
         raise RuntimeError(f"Export directory exists: {directory}")
     os.makedirs(directory)
-    with butler.export(directory=directory, format="yaml", transfer="auto") as export:
+    with butler.export(directory=directory, format="yaml", transfer=transfer) as export:
         collectionsToExport = list(set(collectionsToExport))
         datasetsToExport = list(set(datasetsToExport))
 

diff --git a/tests/config/basic/templates.yaml b/tests/config/basic/templates.yaml
@@ -7,6 +7,7 @@ test_metric_comp: "{run:/}/{datasetType}.{component:?}/{datasetType}_v{visit:08d
 metric2: "{run:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{physical_filter:?}/{instrument:?}_{visit.name:?}"
 metric3: "{run:/}/{datasetType}/{instrument}"
 metric4: "{run:/}/{component:?}_{instrument}_{physical_filter}_{visit:08d}"
+metric5: "{run:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{physical_filter:?}/{instrument:?}_{visit:?}"
 Integer: "{id}"
 physical_filter+: "{run:/}/{instrument}_{physical_filter}"
 instrument<DummyCamComp>:

diff --git a/tests/test_butler.py b/tests/test_butler.py
@@ -78,6 +78,7 @@ def mock_s3(cls):
     DatasetRef,
     DatasetType,
     FileDataset,
+    FileTemplate,
     FileTemplateValidationError,
     StorageClassFactory,
     ValidationError,
@@ -921,6 +922,7 @@ def testGetDatasetTypes(self):
             ignore=[
                 "test_metric_comp",
                 "metric3",
+                "metric5",
                 "calexp",
                 "DummySC",
                 "datasetType.component",
@@ -943,6 +945,7 @@ def testGetDatasetTypes(self):
             ignore=[
                 "test_metric_comp",
                 "metric3",
+                "metric5",
                 "calexp",
                 "DummySC",
                 "datasetType.component",
@@ -1177,6 +1180,18 @@ def testPutTemplates(self):
         # Check the template based on dimensions
         butler.datastore.templates.validateTemplates([ref])
 
+        # Use a template that has a typo in dimension record metadata.
+        # Easier to test with a butler that has a ref with records attached.
+        template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
+        with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
+            path = template.format(ref)
+        self.assertEqual(path, f"a/v423/{ref.id}_fits")
+
+        template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
+        with self.assertRaises(KeyError):
+            with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
+                template.format(ref)
+
         # Now use a file template that will not result in unique filenames
         with self.assertRaises(FileTemplateValidationError):
             butler.put(metric, "metric3", dataId1)

diff --git a/tests/test_datastore.py b/tests/test_datastore.py
@@ -336,7 +336,7 @@ def testTrustGetRequest(self):
         i = 0
         for sc_name in ("StructuredData", "StructuredComposite"):
             i += 1
-            datasetTypeName = f"metric{i}"
+            datasetTypeName = f"test_metric{i}"  # Different dataset type name each time.
 
             if sc_name == "StructuredComposite":
                 disassembled = True
@@ -1135,9 +1135,9 @@ def testConstraints(self):
         testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
         for datasetTypeName, sc, accepted in (
             ("metric", sc1, True),
-            ("metric2", sc1, False),
+            ("metric5", sc1, False),
             ("metric33", sc1, True),
-            ("metric2", sc2, True),
+            ("metric5", sc2, True),
         ):
             # Choose different temp file depending on StorageClass
             testfile = testfile_j if sc.name.endswith("Json") else testfile_y
@@ -1233,10 +1233,10 @@ def testConstraints(self):
 
         for typeName, dataId, sc, accept, ingest in (
             ("metric", dataId1, sc1, (False, True, False), True),
-            ("metric2", dataId1, sc1, (False, False, False), False),
-            ("metric2", dataId2, sc1, (True, False, False), False),
+            ("metric5", dataId1, sc1, (False, False, False), False),
+            ("metric5", dataId2, sc1, (True, False, False), False),
             ("metric33", dataId2, sc2, (True, True, False), True),
-            ("metric2", dataId1, sc2, (False, True, False), True),
+            ("metric5", dataId1, sc2, (False, True, False), True),
         ):
 
             # Choose different temp file depending on StorageClass

diff --git a/tests/test_templates.py b/tests/test_templates.py
@@ -148,6 +148,15 @@ def testRunOrCollectionNeeded(self):
         with self.assertRaises(FileTemplateValidationError):
             self.assertTemplate(tmplstr, "run2/calexp/00052/U", self.makeDatasetRef("calexp"))
 
+    def testNoRecord(self):
+        # Attaching records is not possible in this test code but we can check
+        # that a missing record when a metadata entry has been requested
+        # does fail.
+        tmplstr = "{run}/{datasetType}/{visit.name}/{physical_filter}"
+        with self.assertRaises(RuntimeError) as cm:
+            self.assertTemplate(tmplstr, "", self.makeDatasetRef("calexp"))
+        self.assertIn("No metadata", str(cm.exception))
+
     def testOptional(self):
         """Optional units in templates."""
         ref = self.makeDatasetRef("calexp", conform=False)