Skip to content

Commit

Permalink
Merge pull request #703 from lsst/tickets/DM-32061
Browse files Browse the repository at this point in the history
DM-32061: Fix some issues with export-calibs command
  • Loading branch information
timj committed Jun 29, 2022
2 parents 3ec7b18 + c893ea6 commit aeba0b4
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 13 deletions.
2 changes: 2 additions & 0 deletions doc/changes/DM-32061.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
``butler export-calibs`` can now copy files that require the use of a file template (for example if a direct URI was stored in datastore) with metadata records.
File templates that use metadata records now complain if the record is not attached to the ``DatasetRef``.
2 changes: 2 additions & 0 deletions doc/changes/DM-32061.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
``butler export-calibs`` now takes a ``--transfer`` option to control how data are exported (use ``direct`` to do in-place export) and a ``--datasets`` option to limit the dataset types to be exported.
It also now takes a default collections parameter (all calibration collections).
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,8 +825,10 @@ def register_dataset_type(**kwargs):

@click.command(cls=ButlerCommand)
@repo_argument(required=True)
@directory_argument(required=True)
@directory_argument(required=True, help="DIRECTORY is the folder to receive the exported calibrations.")
@collections_argument(help="COLLECTIONS are the collection to export calibrations from.")
@dataset_type_option(help="Specific DatasetType(s) to export.", multiple=True)
@transfer_option()
def export_calibs(*args, **kwargs):
"""Export calibrations from the butler for import elsewhere."""
table = script.exportCalibs(*args, **kwargs)
Expand Down
26 changes: 25 additions & 1 deletion python/lsst/daf/butler/core/fileTemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,9 @@ def format(self, ref: DatasetRef) -> str:
Raised if the requested field is not defined and the field is
not optional. Or, `component` is specified but "component" was
not part of the template.
RuntimeError
Raised if a template uses dimension record metadata but no
records are attached to the `DatasetRef`.
"""
# Extract defined non-None dimensions from the dataId.
# This guards against Nones being explicitly present in the data ID
Expand Down Expand Up @@ -492,9 +495,30 @@ def format(self, ref: DatasetRef) -> str:
if primary in extras:
record = extras[primary]
# Only fill in the fields if we have a value, the
# KeyError will trigger below if the attribute is missing.
# KeyError will trigger below if the attribute is missing,
# but only if it is not optional. This is most likely
# a typo in the metadata field and so should be reported
# even if optional.
if hasattr(record, secondary):
fields[field_name] = getattr(record, secondary)
else:
# Is a log message sufficient?
log.info(
"Template field %s could not be resolved because metadata field %s"
" is not understood for dimension %s. Template entry will be ignored",
field_name,
secondary,
primary,
)
elif primary in fields:
# We do have an entry for the primary but do not have any
# secondary entries. This is likely a problem with the
# code failing to attach a record to the DatasetRef.
raise RuntimeError(
f"No metadata records attached to dataset {ref}"
f" when attempting to expand field {field_name}."
" Either expand the DatasetRef or change the template."
)

if field_name in fields:
value = fields[field_name]
Expand Down
9 changes: 9 additions & 0 deletions python/lsst/daf/butler/datastores/fileDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2680,6 +2680,15 @@ def export(
if transfer is not None and directory is None:
raise RuntimeError(f"Cannot export using transfer mode {transfer} with no export directory given")

if transfer == "move":
raise RuntimeError("Can not export by moving files out of datastore.")
elif transfer == "direct":
# For an export, treat this as equivalent to None. We do not
# want an import to risk using absolute URIs to datasets owned
# by another datastore.
log.info("Treating 'direct' transfer mode as in-place export.")
transfer = None

# Force the directory to be a URI object
directoryUri: Optional[ResourcePath] = None
if directory is not None:
Expand Down
25 changes: 20 additions & 5 deletions python/lsst/daf/butler/script/exportCalibs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,16 @@ def parseCalibrationCollection(registry, collection, datasetTypes):
calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION]
)
for result in associations:
exportDatasets.append(result.ref)
exportCollections.append(result.ref.run)
# Need an expanded dataId in case file templates will be used
# in the transfer.
dataId = registry.expandDataId(result.ref.dataId)
ref = result.ref.expanded(dataId)
exportDatasets.append(ref)
exportCollections.append(ref.run)
return exportCollections, exportDatasets


def exportCalibs(repo, directory, collections):
def exportCalibs(repo, directory, collections, dataset_type, transfer):
"""Certify a set of calibrations with a validity range.
Parameters
Expand All @@ -85,6 +89,10 @@ def exportCalibs(repo, directory, collections):
Data collections to pull calibrations from. Must be an
existing `~CollectionType.CHAINED` or
`~CollectionType.CALIBRATION` collection.
dataset_type : `tuple` [`str`]
The dataset types to export. Default is to export all.
transfer : `str`
The transfer mode to use for exporting.
Returns
-------
Expand All @@ -99,8 +107,15 @@ def exportCalibs(repo, directory, collections):
"""
butler = Butler(repo, writeable=False)

if not dataset_type:
dataset_type = ...
if not collections:
collections = ...

calibTypes = [
datasetType for datasetType in butler.registry.queryDatasetTypes(...) if datasetType.isCalibration()
datasetType
for datasetType in butler.registry.queryDatasetTypes(dataset_type)
if datasetType.isCalibration()
]

collectionsToExport = []
Expand All @@ -127,7 +142,7 @@ def exportCalibs(repo, directory, collections):
if os.path.exists(directory):
raise RuntimeError(f"Export directory exists: {directory}")
os.makedirs(directory)
with butler.export(directory=directory, format="yaml", transfer="auto") as export:
with butler.export(directory=directory, format="yaml", transfer=transfer) as export:
collectionsToExport = list(set(collectionsToExport))
datasetsToExport = list(set(datasetsToExport))

Expand Down
1 change: 1 addition & 0 deletions tests/config/basic/templates.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ test_metric_comp: "{run:/}/{datasetType}.{component:?}/{datasetType}_v{visit:08d
metric2: "{run:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{physical_filter:?}/{instrument:?}_{visit.name:?}"
metric3: "{run:/}/{datasetType}/{instrument}"
metric4: "{run:/}/{component:?}_{instrument}_{physical_filter}_{visit:08d}"
metric5: "{run:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{physical_filter:?}/{instrument:?}_{visit:?}"
Integer: "{id}"
physical_filter+: "{run:/}/{instrument}_{physical_filter}"
instrument<DummyCamComp>:
Expand Down
15 changes: 15 additions & 0 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def mock_s3(cls):
DatasetRef,
DatasetType,
FileDataset,
FileTemplate,
FileTemplateValidationError,
StorageClassFactory,
ValidationError,
Expand Down Expand Up @@ -921,6 +922,7 @@ def testGetDatasetTypes(self):
ignore=[
"test_metric_comp",
"metric3",
"metric5",
"calexp",
"DummySC",
"datasetType.component",
Expand All @@ -943,6 +945,7 @@ def testGetDatasetTypes(self):
ignore=[
"test_metric_comp",
"metric3",
"metric5",
"calexp",
"DummySC",
"datasetType.component",
Expand Down Expand Up @@ -1177,6 +1180,18 @@ def testPutTemplates(self):
# Check the template based on dimensions
butler.datastore.templates.validateTemplates([ref])

# Use a template that has a typo in dimension record metadata.
# Easier to test with a butler that has a ref with records attached.
template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
path = template.format(ref)
self.assertEqual(path, f"a/v423/{ref.id}_fits")

template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
with self.assertRaises(KeyError):
with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
template.format(ref)

# Now use a file template that will not result in unique filenames
with self.assertRaises(FileTemplateValidationError):
butler.put(metric, "metric3", dataId1)
Expand Down
12 changes: 6 additions & 6 deletions tests/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def testTrustGetRequest(self):
i = 0
for sc_name in ("StructuredData", "StructuredComposite"):
i += 1
datasetTypeName = f"metric{i}"
datasetTypeName = f"test_metric{i}" # Different dataset type name each time.

if sc_name == "StructuredComposite":
disassembled = True
Expand Down Expand Up @@ -1135,9 +1135,9 @@ def testConstraints(self):
testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
for datasetTypeName, sc, accepted in (
("metric", sc1, True),
("metric2", sc1, False),
("metric5", sc1, False),
("metric33", sc1, True),
("metric2", sc2, True),
("metric5", sc2, True),
):
# Choose different temp file depending on StorageClass
testfile = testfile_j if sc.name.endswith("Json") else testfile_y
Expand Down Expand Up @@ -1233,10 +1233,10 @@ def testConstraints(self):

for typeName, dataId, sc, accept, ingest in (
("metric", dataId1, sc1, (False, True, False), True),
("metric2", dataId1, sc1, (False, False, False), False),
("metric2", dataId2, sc1, (True, False, False), False),
("metric5", dataId1, sc1, (False, False, False), False),
("metric5", dataId2, sc1, (True, False, False), False),
("metric33", dataId2, sc2, (True, True, False), True),
("metric2", dataId1, sc2, (False, True, False), True),
("metric5", dataId1, sc2, (False, True, False), True),
):

# Choose different temp file depending on StorageClass
Expand Down
9 changes: 9 additions & 0 deletions tests/test_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,15 @@ def testRunOrCollectionNeeded(self):
with self.assertRaises(FileTemplateValidationError):
self.assertTemplate(tmplstr, "run2/calexp/00052/U", self.makeDatasetRef("calexp"))

def testNoRecord(self):
# Attaching records is not possible in this test code but we can check
# that a missing record when a metadata entry has been requested
# does fail.
tmplstr = "{run}/{datasetType}/{visit.name}/{physical_filter}"
with self.assertRaises(RuntimeError) as cm:
self.assertTemplate(tmplstr, "", self.makeDatasetRef("calexp"))
self.assertIn("No metadata", str(cm.exception))

def testOptional(self):
"""Optional units in templates."""
ref = self.makeDatasetRef("calexp", conform=False)
Expand Down

0 comments on commit aeba0b4

Please sign in to comment.