Skip to content

Commit

Permalink
Merge pull request #346 from lsst/tickets/DM-24247
Browse files Browse the repository at this point in the history
DM-24247: Fix skypix dimension aliasing in file template validation
  • Loading branch information
timj committed Aug 11, 2020
2 parents 27159a1 + 3a946fa commit a3e9947
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 27 deletions.
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ def config_dump(*args, **kwargs):
help="DatasetType(s) to ignore for validation.")
def config_validate(*args, **kwargs):
"""Validate the configuration files for a Gen3 Butler repository."""
cli_handle_exception(configValidate, *args, **kwargs)
is_good = cli_handle_exception(configValidate, *args, **kwargs)
if not is_good:
raise click.exceptions.Exit(1)


@click.command(short_help="Search for collections.")
Expand Down
81 changes: 62 additions & 19 deletions python/lsst/daf/butler/core/fileTemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@
from .configSupport import processLookupConfigs, LookupKey
from .exceptions import ValidationError
from .dimensions import SkyPixDimension, DataCoordinate
from .datasets import DatasetRef
from .storageClass import StorageClass

if TYPE_CHECKING:
from .dimensions import DimensionUniverse
from .datasets import DatasetType, DatasetRef
from .storageClass import StorageClass
from .datasets import DatasetType

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -443,20 +444,11 @@ def format(self, ref: DatasetRef) -> str:
if isinstance(ref.dataId, DataCoordinate):
if ref.dataId.hasRecords():
extras = ref.dataId.records.byName()
# If there is exactly one SkyPixDimension in the data ID, alias its
# value with the key "skypix", so we can use that to match any
# skypix dimension.
# We restrict this behavior to the (real-world) case where the
# data ID is a DataCoordinate, not just a dict. That should only
# not be true in some test code, but that test code is a pain to
# update to be more like the real world while still providing our
# only tests of important behavior.
skypix = [dimension for dimension in ref.datasetType.dimensions
if isinstance(dimension, SkyPixDimension)]
if len(skypix) == 1:
fields["skypix"] = fields[skypix[0].name]
skypix_alias = self._determine_skypix_alias(ref)
if skypix_alias is not None:
fields["skypix"] = fields[skypix_alias]
if extras:
extras["skypix"] = extras[skypix[0].name]
extras["skypix"] = extras[skypix_alias]

datasetType = ref.datasetType
fields["datasetType"], component = datasetType.nameAndComponent()
Expand Down Expand Up @@ -602,6 +594,10 @@ def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass,
if not hasattr(entity, "dimensions"):
return

# Mypy does not know about hasattr so help it out
if entity is None:
return

# if this entity represents a component then insist that component
# is present in the template. If the entity is not a component
# make sure that component is not mandatory.
Expand All @@ -620,21 +616,32 @@ def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass,
except AttributeError:
pass

# From here on we need at least a DatasetType
# Mypy doesn't understand the AttributeError clause below
if isinstance(entity, StorageClass):
return

# Get the dimension links to get the full set of available field names
# Fall back to dataId keys if we have them but no links.
# dataId keys must still be present in the template
# Ignore warnings from mypy concerning StorageClass and DatasetType
# not supporting the full API.
try:
minimal = set(entity.dimensions.required.names) # type: ignore
maximal = set(entity.dimensions.names) # type: ignore
minimal = set(entity.dimensions.required.names)
maximal = set(entity.dimensions.names)
except AttributeError:
try:
minimal = set(entity.dataId.keys()) # type: ignore
maximal = minimal
except AttributeError:
return

# Replace specific skypix dimensions with generic one
skypix_alias = self._determine_skypix_alias(entity)
if skypix_alias is not None:
minimal.add("skypix")
maximal.add("skypix")
minimal.remove(skypix_alias)
maximal.remove(skypix_alias)

required = self.fields(optionals=False)

# Calculate any field usage that does not match a dimension
Expand All @@ -647,3 +654,39 @@ def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass,
f" {allfields} is not a superset of {minimal}.")

return

def _determine_skypix_alias(self, entity: Union[DatasetRef, DatasetType]) -> Optional[str]:
"""Given a `DatasetRef` return the dimension name that refers to a sky
pixel.
Parameters
----------
ref : `DatasetRef` or `DatasetType`
The entity to examine.
Returns
-------
alias : `str`
If there is a sky pixelization in the supplied dataId, return
its name, else returns `None`. Will return `None` also if there
is more than one sky pix dimension in the data ID or if the
dataID is not a `DataCoordinate`
"""
alias = None

if isinstance(entity, DatasetRef):
entity = entity.datasetType

# If there is exactly one SkyPixDimension in the data ID, alias its
# value with the key "skypix", so we can use that to match any
# skypix dimension.
# We restrict this behavior to the (real-world) case where the
# data ID is a DataCoordinate, not just a dict. That should only
# not be true in some test code, but that test code is a pain to
# update to be more like the real world while still providing our
# only tests of important behavior.
skypix = [dimension for dimension in entity.dimensions
if isinstance(dimension, SkyPixDimension)]
if len(skypix) == 1:
alias = skypix[0].name
return alias
21 changes: 14 additions & 7 deletions python/lsst/daf/butler/script/configValidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from .. import Butler
from .. import Butler, ValidationError


def configValidate(repo, quiet, dataset_type, ignore):
Expand All @@ -36,12 +36,19 @@ def configValidate(repo, quiet, dataset_type, ignore):
ignore : [`str`]
"DatasetTypes to ignore for validation."
Raises
------
ValidationError
If a configuration fails validation.
Returns
-------
is_good : `bool`
`True` if validation was okay. `False` if there was a validation
error.
"""
logFailures = not quiet
butler = Butler(config=repo)
butler.validateConfiguration(logFailures=logFailures, datasetTypeNames=dataset_type, ignore=ignore)
print("No problems encountered with configuration.")
is_good = True
try:
butler.validateConfiguration(logFailures=logFailures, datasetTypeNames=dataset_type, ignore=ignore)
except ValidationError:
is_good = False
else:
print("No problems encountered with configuration.")
return is_good

0 comments on commit a3e9947

Please sign in to comment.