-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DM-13350: Use templates to work out file names from data units #17
Changes from all commits
8d7bf7c
dd18cdd
bc7f2a9
9eeeeeb
6537d0b
3012f5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
"""Code relating to DataUnits.""" | ||
|
||
|
||
class DataUnits: | ||
"""Represent DataUnits specification. | ||
|
||
Parameters | ||
---------- | ||
units : `dict` | ||
Dictionary of data units keys and values. | ||
""" | ||
|
||
def __init__(self, units): | ||
self.units = units.copy() | ||
|
||
def definedUnits(self): | ||
"""DataUnits with non-None values.""" | ||
return {k: v for k, v in self.units.items() if v is not None} | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
"""Support for file template string expansion.""" | ||
|
||
import os.path | ||
import string | ||
|
||
from .config import Config | ||
|
||
|
||
class FileTemplatesConfig(Config): | ||
pass | ||
|
||
|
||
class FileTemplates: | ||
"""Collection of `FileTemplate` templates. | ||
|
||
Parameters | ||
---------- | ||
config : `FileTemplatesConfig` or `str` | ||
Load configuration. | ||
""" | ||
def __init__(self, config, default=None): | ||
self.config = FileTemplatesConfig(config) | ||
self.templates = {} | ||
for name, info in self.config.items(): | ||
self.templates[name] = FileTemplate(info) | ||
|
||
def getTemplate(self, datasetType): | ||
"""Retrieve the `FileTemplate` associated with the dataset type. | ||
|
||
Parameters | ||
---------- | ||
datasetType : `str` | ||
Dataset type. | ||
|
||
Returns | ||
------- | ||
template : `FileTemplate` | ||
Template instance to use with that dataset type. | ||
""" | ||
# Get a location from the templates | ||
template = None | ||
component = None | ||
if datasetType is not None: | ||
if datasetType in self.templates: | ||
template = self.templates[datasetType] | ||
elif "." in datasetType: | ||
baseType, component = datasetType.split(".", maxsplit=1) | ||
if baseType in self.templates: | ||
template = self.templates[baseType] | ||
|
||
if template is None: | ||
if "default" in self.templates: | ||
template = self.templates["default"] | ||
|
||
# if still not template give up for now. | ||
if template is None: | ||
raise TypeError("Unable to determine file template from supplied type [{}]".format(datasetType)) | ||
|
||
return template | ||
|
||
|
||
class FileTemplate: | ||
"""Format a path template into a fully expanded path. | ||
|
||
Parameters | ||
---------- | ||
template : `str` | ||
Template string. | ||
|
||
Notes | ||
----- | ||
The templates use the standard Format Specification Mini-Language | ||
with the caveat that only named fields can be used. The field names | ||
are taken from the DataUnits along with two additional fields: | ||
"datasetType" will be replaced with the DatasetType and "component" | ||
will be replaced with the component name of a composite. | ||
|
||
The mini-language is extended to understand a "?" in the format | ||
specification. This indicates that a field is optional. If that | ||
DataUnit is missing the field, along with the text before the field, | ||
unless it is a path separator, will be removed from the output path. | ||
""" | ||
|
||
def __init__(self, template): | ||
self.template = template | ||
|
||
def format(self, dataUnits, datasetType=None, component=None): | ||
"""Format a template string into a full path. | ||
|
||
Parameters | ||
---------- | ||
dataUnits : `DataUnits` | ||
DataUnits and the corresponding values. | ||
datasetType : `str`, optional. | ||
DatasetType name to use if needed. If it contains a "." separator | ||
the type name will be split up into the main DatasetType and a | ||
component. | ||
component : `str`, optional | ||
Component of a composite. If `datasetType` defines a component | ||
this parameter will be ignored. | ||
|
||
Returns | ||
------- | ||
path : `str` | ||
Expanded path. | ||
|
||
Raises | ||
------ | ||
KeyError | ||
Requested field is not defined and the field is not optional. | ||
Or, `component` is specified but "component" was not part of | ||
the template. | ||
""" | ||
fields = dataUnits.definedUnits() | ||
|
||
if datasetType is not None: | ||
# calexp.wcs means wcs component of a calexp | ||
if "." in datasetType: | ||
datasetType, component = datasetType.split(".", maxsplit=1) | ||
fields["datasetType"] = datasetType | ||
|
||
usedComponent = False | ||
if component is not None: | ||
fields["component"] = component | ||
|
||
fmt = string.Formatter() | ||
parts = fmt.parse(self.template) | ||
output = "" | ||
|
||
for literal, field_name, format_spec, conversion in parts: | ||
|
||
if field_name == "component": | ||
usedComponent = True | ||
|
||
if "?" in format_spec: | ||
optional = True | ||
# Remove the non-standard character from the spec | ||
format_spec = format_spec.replace("?", "") | ||
else: | ||
optional = False | ||
|
||
if field_name in fields: | ||
value = fields[field_name] | ||
elif optional: | ||
# If this is optional ignore the format spec | ||
# and do not include the literal text prior to the optional | ||
# field unless it contains a "/" path separator | ||
format_spec = "" | ||
value = "" | ||
if "/" not in literal: | ||
literal = "" | ||
else: | ||
raise KeyError("{} requested in template but not defined and not optional".format(field_name)) | ||
|
||
# Now use standard formatting | ||
output = output + literal + format(value, format_spec) | ||
|
||
# Complain if we were meant to use a component | ||
if component is not None and not usedComponent: | ||
raise KeyError("Component {} specified but template {} did not use it".format(component, | ||
self.template)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this different from any other field? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because if you explicitly specify a component but the component is not used that's going to be a problem because it means the template does not specify how to distinguish components. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was assuming that DataUnits could be far more detailed than is needed for the file templates. That's why I didn't check that every unit was used. |
||
|
||
# Since this is known to be a path, normalize it in case some double | ||
# slashes have crept in | ||
path = os.path.normpath(output) | ||
|
||
# It should not be an absolute path (may happen with optionals) | ||
if os.path.isabs(path): | ||
path = os.path.relpath(path, start="/") | ||
|
||
return path |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure why this is needed. Should be absorbed into
DataUnitTypeSet
. But we will have to do that later anyway, so fine to keep it around for now I suppose.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did not know we had a class for that. I asked on Slack but got no hints. Sorry.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't see a
DataUnitTypeSet
in the codebase.