Skip to content

Commit

Permalink
Allow id to be used in file template
Browse files Browse the repository at this point in the history
If id is used it must at least be present in the file
name part of the template path.
  • Loading branch information
timj committed Jan 27, 2022
1 parent 1a2eac5 commit be21926
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 5 deletions.
30 changes: 25 additions & 5 deletions python/lsst/daf/butler/core/fileTemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ class FileTemplate:
template.
"""

mandatoryFields = {"run"}
mandatoryFields = {"run", "id"}
"""A set of fields, one of which must be present in a template."""

datasetFields = {"datasetType", "component"}
Expand Down Expand Up @@ -462,6 +462,9 @@ def format(self, ref: DatasetRef) -> str:
usedRun = False
fields["run"] = ref.run

usedId = False
fields["id"] = ref.id

fmt = string.Formatter()
parts = fmt.parse(self.template)
output = ""
Expand All @@ -486,8 +489,11 @@ def format(self, ref: DatasetRef) -> str:
else:
optional = False

# We must use at least a run or id.
if field_name == "run":
usedRun = True
if field_name == "id":
usedId = True

if field_name == "collection":
raise KeyError(
Expand Down Expand Up @@ -552,9 +558,11 @@ def format(self, ref: DatasetRef) -> str:
"Component '{}' specified but template {} did not use it".format(component, self.template)
)

# Complain if there's no run
if not usedRun:
raise KeyError("Template does not include 'run'.")
# Complain if there's no run or id
if not usedRun and not usedId:
missing = ("run" if not usedRun else None, "id" if not usedId else None)
text = " or ".join(f"'{m}'" for m in missing if m is not None)
raise KeyError(f"Template does not include {text}.")

# Since this is known to be a path, normalize it in case some double
# slashes have crept in
Expand Down Expand Up @@ -596,12 +604,24 @@ def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass,
)

# Check that there are some dimension fields in the template
# The id is allowed instead if present since that also uniquely
# identifies the file in the datastore.
allfields = self.fields(optionals=True)
if not allfields:
if not allfields and "id" not in withSpecials:
raise FileTemplateValidationError(
f"Template '{self}' does not seem to have any fields corresponding to dimensions."
)

# Require that if "id" is in the template then it must exist in the
# file part -- this avoids templates like "{id}/fixed" where the file
# name is fixed but the directory has the ID.
if "id" in withSpecials:
file_part = os.path.split(self.template)[-1]
if "{id}" not in file_part:
raise FileTemplateValidationError(
f"Template '{self}' includes the 'id' but that ID is not part of the file name."
)

# If we do not have dimensions available then all we can do is shrug
if not hasattr(entity, "dimensions"):
return
Expand Down
1 change: 1 addition & 0 deletions tests/config/basic/templates.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ test_metric_comp: "{run:/}/{datasetType}.{component:?}/{datasetType}_v{visit:08d
metric2: "{run:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{physical_filter:?}/{instrument:?}_{visit.name:?}"
metric3: "{run:/}/{datasetType}/{instrument}"
metric4: "{run:/}/{component:?}_{instrument}_{physical_filter}_{visit:08d}"
Integer: "{id}"
physical_filter+: "{run:/}/{instrument}_{physical_filter}"
instrument<DummyCamComp>:
metric33: "{run:/}/{instrument}/{datasetType}.{component:?}/{datasetType}_v{visit:08d}_f{physical_filter}_{component:?}"
26 changes: 26 additions & 0 deletions tests/test_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,23 @@ def testBasic(self):
self.makeDatasetRef("calexp", run="run/2", conform=False),
)

# Check that the id is sufficient without any other information.
self.assertTemplate("{id}", "1", self.makeDatasetRef("calexp", run="run2", conform=False))

self.assertTemplate("{run}/{id}", "run2/1", self.makeDatasetRef("calexp", run="run2", conform=False))

self.assertTemplate(
"fixed/{id}",
"fixed/1",
self.makeDatasetRef("calexp", run="run2", conform=False),
)

self.assertTemplate(
"fixed/{id}_{physical_filter}",
"fixed/1_Most_Amazing_U_Filter_Ever",
self.makeDatasetRef("calexp", run="run2", conform=False),
)

# Retain any "/" in run
tmplstr = "{run:/}/{datasetType}/{visit:05d}/{physical_filter}-trail-{run}"
self.assertTemplate(
Expand All @@ -123,6 +140,9 @@ def testBasic(self):
with self.assertRaises(FileTemplateValidationError):
FileTemplate("{run}_{datasetType}")

with self.assertRaises(FileTemplateValidationError):
FileTemplate("{id}/fixed")

def testRunOrCollectionNeeded(self):
tmplstr = "{datasetType}/{visit:05d}/{physical_filter}"
with self.assertRaises(FileTemplateValidationError):
Expand Down Expand Up @@ -346,6 +366,12 @@ def testValidation(self):
dataId={"physical_filter": "i", "instrument": "HSC"},
)

entities["metric6"] = self.makeDatasetRef(
"filter_inst",
storageClassName="Integer",
dataId={"physical_filter": "i", "instrument": "HSC"},
)

templates.validateTemplates(entities.values(), logFailures=True)

# Rerun but with a failure
Expand Down

0 comments on commit be21926

Please sign in to comment.