Skip to content

Commit

Permalink
Refactor name lookup for templates/formatters/composites
Browse files Browse the repository at this point in the history
Now wherever a config file has keys that can be StorageClass
names, DatasetType names or "instrument<xxx>" overrides,
the parsing of this is done in one place and a new LookupKey
object is created and used as the key in the mappings.

This provides the ground work for supporting Dimensions
in config files.
  • Loading branch information
timj committed Feb 21, 2019
1 parent 990c80d commit 3513512
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 54 deletions.
14 changes: 10 additions & 4 deletions python/lsst/daf/butler/core/composites.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import logging

from .configSupport import processLookupConfigs
from .config import ConfigSubset

log = logging.getLogger(__name__)
Expand All @@ -41,6 +42,7 @@ class CompositesConfig(ConfigSubset):
def validate(self):
"""Validate entries have the correct type."""
super().validate()
# For now assume flat config with keys mapping to booleans
for k, v in self[DISASSEMBLY_KEY].items():
if not isinstance(v, bool):
raise ValueError(f"CompositesConfig: Key {k} is not a Boolean")
Expand All @@ -62,6 +64,10 @@ def __init__(self, config):
assert isinstance(config, CompositesConfig)
self.config = config

# Calculate the disassembly lookup table -- no need to process
# the values
self._lut = processLookupConfigs(self.config[DISASSEMBLY_KEY])

def shouldBeDisassembled(self, entity):
"""Given some choices, indicate whether the entity should be
disassembled.
Expand Down Expand Up @@ -97,10 +103,10 @@ def shouldBeDisassembled(self, entity):
matchName = "{} (via default)".format(entity)
disassemble = self.config["default"]

for name in (entity._lookupNames()):
if name is not None and name in self.config[DISASSEMBLY_KEY]:
disassemble = self.config[DISASSEMBLY_KEY, name]
matchName = name
for key in (entity._lookupNames()):
if key is not None and key in self._lut:
disassemble = self._lut[key]
matchName = key
break

log.debug("%s will%s be disassembled", matchName, "" if disassemble else " not")
Expand Down
193 changes: 193 additions & 0 deletions python/lsst/daf/butler/core/configSupport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Support for configuration snippets"""

__all__ = ("LookupKey", "processLookupConfigs")

import logging
import re
from collections.abc import Mapping
from .dimensions import DimensionNameSet

log = logging.getLogger(__name__)


class LookupKey:
"""Representation of key that can be used to lookup information based
on dataset type name, storage class name, dimensions.
Parameters
----------
name : `str`, optional
Primary index string for lookup. If this string looks like it
represents dimensions (via ``dim1+dim2+dim3`` syntax) the name
is converted to a `DimensionNameSet` and stored in ``dimensions``
property.
dimensions : `DimensionNameSet`, optional
Dimensions that are relevant for lookup. Should not be specified
if ``name`` is also specified.
dataId : `dict`, optional
Keys and values from a dataId that should control lookups.
"""

def __init__(self, name=None, dimensions=None, dataId=None):
if name is None and dimensions is None:
raise ValueError("At least one of name or dimensions must be given")

if name is not None and dimensions is not None:
raise ValueError("Can only accept one of name or dimensions")

self._dimensions = None
self._name = None

if name is not None:
if "+" in name:
self._dimensions = DimensionNameSet(name.split("+"))
else:
self._name = name
else:
self._dimensions = dimensions

# The dataId is converted to a frozenset of key/value
# tuples.
if dataId is not None:
self._dataId = frozenset((k, v) for k, v in dataId.items())
else:
self._dataId = None

def __str__(self):
return "({}, {})".format(self._name if self._name else self._dimensions,
",".join(str(t) for t in self._dataId) if self._dataId else "")

def __repr__(self):
params = ""
if self.name:
params += f"name={self.name!r},"
if self.dimensions:
params += f"dimensions={self.dimensions!r},"
if self._dataId:
params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}"

return f"{self.__class__.__name__}({params})"

def __eq__(self, other):
if self._name == other._name and self._dimensions == other._dimensions and \
self._dataId == other._dataId:
return True
return False

@property
def name(self):
"""Primary name string to use as lookup."""
return self._name

@property
def dimensions(self):
"""Dimensions associated with lookup."""
return self._dimensions

@property
def dataId(self):
"""Set of key/value tuples that are important for dataId lookup."""
return self._dataId

def __hash__(self):
"""Hash the lookup to allow use as a key in a dict."""
return hash((self._name, self._dimensions, self._dataId))

def clone(self, name=None, dimensions=None, dataId=None):
"""Clone the object, overriding some options.
Used to create a new instance of the object whilst updating
some of it.
Parameters
----------
name : `str`, optional
Primary index string for lookup. Will override ``dimensions``
if ``dimensions`` are set.
dimensions : `DimensionNameSet`, optional
Dimensions that are relevant for lookup. Will override ``name``
if ``name`` is already set.
dataId : `dict`, optional
Keys and values from a dataId that should control lookups.
Returns
-------
clone : `LookupKey`
Copy with updates.
"""
if name is not None and dimensions is not None:
raise ValueError("Both name and dimensions can not be set")

# if neither name nor dimensions are specified we copy from current
# object. Otherwise we'll use the supplied values
if name is None and dimensions is None:
name = self._name
dimensions = self._dimensions

# To copy the dataId we need to convert it back to a dict when
# copying
if dataId is None and self._dataId is not None:
dataId = {k: v for k, v in self._dataId}

return self.__class__(name=name, dimensions=dimensions, dataId=dataId)


def processLookupConfigs(config):
"""Process sections of configuration relating to lookups by dataset type
name, storage class name, dataId components or dimensions.
Parameters
----------
config : `Config`
A `Config` representing a configuration mapping keys to values where
the keys can be dataset type names, storage class names, dimensions
or dataId components.
Returns
-------
contents : `dict` of `LookupKey` to `str`
A `dict` with keys constructed from the configuration keys and values
being simple strings. It is assumed the caller will convert the
values to the required form.
"""
contents = {}
for name, value in config.items():
if isinstance(value, Mapping):
# indicates a dataId component -- check the format
kv = re.match(r"([a-z_]+)<(.*)>$", name)
if kv:
dataIdKey = kv.group(1)
dataIdValue = kv.group(2)
for subKey, subStr in value.items():
lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue})
contents[lookup] = subStr
else:
log.warning("Hierarchical key '%s' not in form 'key<value>'", name)
else:
lookup = LookupKey(name=name)
contents[lookup] = value

for k, v in contents.items():
print(f"{k}: {v}")
return contents
23 changes: 17 additions & 6 deletions python/lsst/daf/butler/core/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .utils import slotValuesAreEqual
from .storageClass import StorageClass, StorageClassFactory
from .dimensions import DimensionGraph, DimensionNameSet
from .configSupport import LookupKey

__all__ = ("DatasetType", "DatasetRef")

Expand Down Expand Up @@ -236,16 +237,25 @@ def isComposite(self):
return self.storageClass.isComposite()

def _lookupNames(self):
"""Names to use when looking up this datasetType in a configuration.
"""Name keys to use when looking up this datasetType in a
configuration.
The names are returned in order of priority.
Returns
-------
names : `tuple` of `str`
names : `tuple` of `LookupKey`
Tuple of the `DatasetType` name and the `StorageClass` name.
If the name includes a component the name with the component
is first, then the name without the component and finally
the storage class name.
"""
return (self.name, *self.storageClass._lookupNames())
rootName, componentName = self.nameAndComponent()
lookups = (LookupKey(name=self.name),)
if componentName is not None:
lookups = lookups + (LookupKey(name=rootName),)

return lookups + self.storageClass._lookupNames()

def __reduce__(self):
"""Support pickling.
Expand Down Expand Up @@ -449,13 +459,13 @@ def isComposite(self):
return self.datasetType.isComposite()

def _lookupNames(self):
"""Names to use when looking up this DatasetRef in a configuration.
"""Name keys to use when looking up this DatasetRef in a configuration.
The names are returned in order of priority.
Returns
-------
names : `tuple` of `str`
names : `tuple` of `LookupKey`
Tuple of the `DatasetType` name and the `StorageClass` name.
If ``instrument`` is defined in the dataId, each of those names
is added to the start of the tuple with a key derived from the
Expand All @@ -466,6 +476,7 @@ def _lookupNames(self):
names = self.datasetType._lookupNames()

if "instrument" in self.dataId:
names = tuple(f"instrument<{self.dataId['instrument']}>{n}" for n in names) + names
names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
for n in names) + names

return names
27 changes: 8 additions & 19 deletions python/lsst/daf/butler/core/fileTemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@

import os.path
import string
from collections.abc import Mapping

from .config import Config
from .datasets import DatasetType
from .configSupport import processLookupConfigs, LookupKey


class FileTemplatesConfig(Config):
Expand Down Expand Up @@ -65,22 +64,18 @@ def __init__(self, config, default=None):
self.config = FileTemplatesConfig(config)
self.templates = {}
self.default = FileTemplate(default) if default is not None else None
for name, templateStr in self.config.items():
# We can disable defaulting with an empty string in a config
# or by using a boolean
if name == "default":
contents = processLookupConfigs(self.config)

# Convert all the values to FileTemplate, handling defaults
defaultKey = LookupKey(name="default")
for key, templateStr in contents.items():
if key == defaultKey:
if not templateStr:
self.default = None
else:
self.default = FileTemplate(templateStr)
else:
# Possible to have a second level hierarchy but store as
# full names without separator
if isinstance(templateStr, Mapping):
for subKey, subStr in templateStr.items():
self.templates[f"{name}{subKey}"] = FileTemplate(subStr)
else:
self.templates[name] = FileTemplate(templateStr)
self.templates[key] = FileTemplate(templateStr)

def getTemplate(self, entity):
"""Retrieve the `FileTemplate` associated with the dataset type.
Expand Down Expand Up @@ -120,12 +115,6 @@ def getTemplate(self, entity):
template = self.templates[name]
break

baseType, component = DatasetType.splitDatasetTypeName(name)
if component is not None and baseType in self.templates:
template = self.templates[baseType]
break

# if still not template give up for now.
if template is None:
raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")

Expand Down
11 changes: 4 additions & 7 deletions python/lsst/daf/butler/core/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from abc import ABCMeta, abstractmethod
from collections.abc import Mapping

from .configSupport import processLookupConfigs
from .mappingFactory import MappingFactory
from .utils import getFullTypeName

Expand Down Expand Up @@ -158,12 +158,9 @@ def registerFormatters(self, config):
that will be returned if a `DatasetRef` contains a matching instrument
name in the data ID.
"""
for name, f in config.items():
if isinstance(f, Mapping):
for subName, subF in f.items():
self.registerFormatter(f"{name}{subName}", subF)
else:
self.registerFormatter(name, f)
contents = processLookupConfigs(config)
for key, f in contents.items():
self.registerFormatter(key, f)

def getFormatter(self, entity):
"""Get a new formatter instance.
Expand Down

0 comments on commit 3513512

Please sign in to comment.