Refactor name lookup for templates/formatters/composites

Now wherever a config file has keys that can be StorageClass names, DatasetType names or "instrument<xxx>" overrides, the parsing of this is done in one place and a new LookupKey object is created and used as the key in the mappings. This provides the ground work for supporting Dimensions in config files.
lsst · Feb 21, 2019 · 3513512 · 3513512
1 parent 990c80d
commit 3513512
Show file tree

Hide file tree

Showing 7 changed files with 258 additions and 54 deletions.
diff --git a/python/lsst/daf/butler/core/composites.py b/python/lsst/daf/butler/core/composites.py
@@ -25,6 +25,7 @@
 
 import logging
 
+from .configSupport import processLookupConfigs
 from .config import ConfigSubset
 
 log = logging.getLogger(__name__)
@@ -41,6 +42,7 @@ class CompositesConfig(ConfigSubset):
     def validate(self):
         """Validate entries have the correct type."""
         super().validate()
+        # For now assume flat config with keys mapping to booleans
         for k, v in self[DISASSEMBLY_KEY].items():
             if not isinstance(v, bool):
                 raise ValueError(f"CompositesConfig: Key {k} is not a Boolean")
@@ -62,6 +64,10 @@ def __init__(self, config):
         assert isinstance(config, CompositesConfig)
         self.config = config
 
+        # Calculate the disassembly lookup table -- no need to process
+        # the values
+        self._lut = processLookupConfigs(self.config[DISASSEMBLY_KEY])
+
     def shouldBeDisassembled(self, entity):
         """Given some choices, indicate whether the entity should be
         disassembled.
@@ -97,10 +103,10 @@ def shouldBeDisassembled(self, entity):
         matchName = "{} (via default)".format(entity)
         disassemble = self.config["default"]
 
-        for name in (entity._lookupNames()):
-            if name is not None and name in self.config[DISASSEMBLY_KEY]:
-                disassemble = self.config[DISASSEMBLY_KEY, name]
-                matchName = name
+        for key in (entity._lookupNames()):
+            if key is not None and key in self._lut:
+                disassemble = self._lut[key]
+                matchName = key
                 break
 
         log.debug("%s will%s be disassembled", matchName, "" if disassemble else " not")

diff --git a/python/lsst/daf/butler/core/configSupport.py b/python/lsst/daf/butler/core/configSupport.py
@@ -0,0 +1,193 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Support for configuration snippets"""
+
+__all__ = ("LookupKey", "processLookupConfigs")
+
+import logging
+import re
+from collections.abc import Mapping
+from .dimensions import DimensionNameSet
+
+log = logging.getLogger(__name__)
+
+
+class LookupKey:
+    """Representation of key that can be used to lookup information based
+    on dataset type name, storage class name, dimensions.
+
+    Parameters
+    ----------
+    name : `str`, optional
+        Primary index string for lookup.  If this string looks like it
+        represents dimensions (via ``dim1+dim2+dim3`` syntax) the name
+        is converted to a `DimensionNameSet` and stored in ``dimensions``
+        property.
+    dimensions : `DimensionNameSet`, optional
+        Dimensions that are relevant for lookup. Should not be specified
+        if ``name`` is also specified.
+    dataId : `dict`, optional
+        Keys and values from a dataId that should control lookups.
+    """
+
+    def __init__(self, name=None, dimensions=None, dataId=None):
+        if name is None and dimensions is None:
+            raise ValueError("At least one of name or dimensions must be given")
+
+        if name is not None and dimensions is not None:
+            raise ValueError("Can only accept one of name or dimensions")
+
+        self._dimensions = None
+        self._name = None
+
+        if name is not None:
+            if "+" in name:
+                self._dimensions = DimensionNameSet(name.split("+"))
+            else:
+                self._name = name
+        else:
+            self._dimensions = dimensions
+
+        # The dataId is converted to a frozenset of key/value
+        # tuples.
+        if dataId is not None:
+            self._dataId = frozenset((k, v) for k, v in dataId.items())
+        else:
+            self._dataId = None
+
+    def __str__(self):
+        return "({}, {})".format(self._name if self._name else self._dimensions,
+                                 ",".join(str(t) for t in self._dataId) if self._dataId else "")
+
+    def __repr__(self):
+        params = ""
+        if self.name:
+            params += f"name={self.name!r},"
+        if self.dimensions:
+            params += f"dimensions={self.dimensions!r},"
+        if self._dataId:
+            params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}"
+
+        return f"{self.__class__.__name__}({params})"
+
+    def __eq__(self, other):
+        if self._name == other._name and self._dimensions == other._dimensions and \
+                self._dataId == other._dataId:
+            return True
+        return False
+
+    @property
+    def name(self):
+        """Primary name string to use as lookup."""
+        return self._name
+
+    @property
+    def dimensions(self):
+        """Dimensions associated with lookup."""
+        return self._dimensions
+
+    @property
+    def dataId(self):
+        """Set of key/value tuples that are important for dataId lookup."""
+        return self._dataId
+
+    def __hash__(self):
+        """Hash the lookup to allow use as a key in a dict."""
+        return hash((self._name, self._dimensions, self._dataId))
+
+    def clone(self, name=None, dimensions=None, dataId=None):
+        """Clone the object, overriding some options.
+
+        Used to create a new instance of the object whilst updating
+        some of it.
+
+        Parameters
+        ----------
+        name : `str`, optional
+            Primary index string for lookup.  Will override ``dimensions``
+            if ``dimensions`` are set.
+        dimensions : `DimensionNameSet`, optional
+            Dimensions that are relevant for lookup. Will override ``name``
+            if ``name`` is already set.
+        dataId : `dict`, optional
+            Keys and values from a dataId that should control lookups.
+
+        Returns
+        -------
+        clone : `LookupKey`
+            Copy with updates.
+        """
+        if name is not None and dimensions is not None:
+            raise ValueError("Both name and dimensions can not be set")
+
+        # if neither name nor dimensions are specified we copy from current
+        # object. Otherwise we'll use the supplied values
+        if name is None and dimensions is None:
+            name = self._name
+            dimensions = self._dimensions
+
+        # To copy the dataId we need to convert it back to a dict when
+        # copying
+        if dataId is None and self._dataId is not None:
+            dataId = {k: v for k, v in self._dataId}
+
+        return self.__class__(name=name, dimensions=dimensions, dataId=dataId)
+
+
+def processLookupConfigs(config):
+    """Process sections of configuration relating to lookups by dataset type
+    name, storage class name, dataId components or dimensions.
+
+    Parameters
+    ----------
+    config : `Config`
+        A `Config` representing a configuration mapping keys to values where
+        the keys can be dataset type names, storage class names, dimensions
+        or dataId components.
+
+    Returns
+    -------
+    contents : `dict` of `LookupKey` to `str`
+        A `dict` with keys constructed from the configuration keys and values
+        being simple strings.  It is assumed the caller will convert the
+        values to the required form.
+    """
+    contents = {}
+    for name, value in config.items():
+        if isinstance(value, Mapping):
+            # indicates a dataId component -- check the format
+            kv = re.match(r"([a-z_]+)<(.*)>$", name)
+            if kv:
+                dataIdKey = kv.group(1)
+                dataIdValue = kv.group(2)
+                for subKey, subStr in value.items():
+                    lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue})
+                    contents[lookup] = subStr
+            else:
+                log.warning("Hierarchical key '%s' not in form 'key<value>'", name)
+        else:
+            lookup = LookupKey(name=name)
+            contents[lookup] = value
+
+    for k, v in contents.items():
+        print(f"{k}: {v}")
+    return contents
diff --git a/python/lsst/daf/butler/core/datasets.py b/python/lsst/daf/butler/core/datasets.py
@@ -26,6 +26,7 @@
 from .utils import slotValuesAreEqual
 from .storageClass import StorageClass, StorageClassFactory
 from .dimensions import DimensionGraph, DimensionNameSet
+from .configSupport import LookupKey
 
 __all__ = ("DatasetType", "DatasetRef")
 
@@ -236,16 +237,25 @@ def isComposite(self):
         return self.storageClass.isComposite()
 
     def _lookupNames(self):
-        """Names to use when looking up this datasetType in a configuration.
+        """Name keys to use when looking up this datasetType in a
+        configuration.
 
         The names are returned in order of priority.
 
         Returns
         -------
-        names : `tuple` of `str`
+        names : `tuple` of `LookupKey`
             Tuple of the `DatasetType` name and the `StorageClass` name.
+            If the name includes a component the name with the component
+            is first, then the name without the component and finally
+            the storage class name.
         """
-        return (self.name, *self.storageClass._lookupNames())
+        rootName, componentName = self.nameAndComponent()
+        lookups = (LookupKey(name=self.name),)
+        if componentName is not None:
+            lookups = lookups + (LookupKey(name=rootName),)
+
+        return lookups + self.storageClass._lookupNames()
 
     def __reduce__(self):
         """Support pickling.
@@ -449,13 +459,13 @@ def isComposite(self):
         return self.datasetType.isComposite()
 
     def _lookupNames(self):
-        """Names to use when looking up this DatasetRef in a configuration.
+        """Name keys to use when looking up this DatasetRef in a configuration.
 
         The names are returned in order of priority.
 
         Returns
         -------
-        names : `tuple` of `str`
+        names : `tuple` of `LookupKey`
             Tuple of the `DatasetType` name and the `StorageClass` name.
             If ``instrument`` is defined in the dataId, each of those names
             is added to the start of the tuple with a key derived from the
@@ -466,6 +476,7 @@ def _lookupNames(self):
         names = self.datasetType._lookupNames()
 
         if "instrument" in self.dataId:
-            names = tuple(f"instrument<{self.dataId['instrument']}>{n}" for n in names) + names
+            names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
+                          for n in names) + names
 
         return names
diff --git a/python/lsst/daf/butler/core/fileTemplates.py b/python/lsst/daf/butler/core/fileTemplates.py
@@ -25,10 +25,9 @@
 
 import os.path
 import string
-from collections.abc import Mapping
 
 from .config import Config
-from .datasets import DatasetType
+from .configSupport import processLookupConfigs, LookupKey
 
 
 class FileTemplatesConfig(Config):
@@ -65,22 +64,18 @@ def __init__(self, config, default=None):
         self.config = FileTemplatesConfig(config)
         self.templates = {}
         self.default = FileTemplate(default) if default is not None else None
-        for name, templateStr in self.config.items():
-            # We can disable defaulting with an empty string in a config
-            # or by using a boolean
-            if name == "default":
+        contents = processLookupConfigs(self.config)
+
+        # Convert all the values to FileTemplate, handling defaults
+        defaultKey = LookupKey(name="default")
+        for key, templateStr in contents.items():
+            if key == defaultKey:
                 if not templateStr:
                     self.default = None
                 else:
                     self.default = FileTemplate(templateStr)
             else:
-                # Possible to have a second level hierarchy but store as
-                # full names without separator
-                if isinstance(templateStr, Mapping):
-                    for subKey, subStr in templateStr.items():
-                        self.templates[f"{name}{subKey}"] = FileTemplate(subStr)
-                else:
-                    self.templates[name] = FileTemplate(templateStr)
+                self.templates[key] = FileTemplate(templateStr)
 
     def getTemplate(self, entity):
         """Retrieve the `FileTemplate` associated with the dataset type.
@@ -120,12 +115,6 @@ def getTemplate(self, entity):
                 template = self.templates[name]
                 break
 
-            baseType, component = DatasetType.splitDatasetTypeName(name)
-            if component is not None and baseType in self.templates:
-                template = self.templates[baseType]
-                break
-
-        # if still not template give up for now.
         if template is None:
             raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")
 

diff --git a/python/lsst/daf/butler/core/formatter.py b/python/lsst/daf/butler/core/formatter.py
@@ -20,8 +20,8 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from abc import ABCMeta, abstractmethod
-from collections.abc import Mapping
 
+from .configSupport import processLookupConfigs
 from .mappingFactory import MappingFactory
 from .utils import getFullTypeName
 
@@ -158,12 +158,9 @@ def registerFormatters(self, config):
         that will be returned if a `DatasetRef` contains a matching instrument
         name in the data ID.
         """
-        for name, f in config.items():
-            if isinstance(f, Mapping):
-                for subName, subF in f.items():
-                    self.registerFormatter(f"{name}{subName}", subF)
-            else:
-                self.registerFormatter(name, f)
+        contents = processLookupConfigs(config)
+        for key, f in contents.items():
+            self.registerFormatter(key, f)
 
     def getFormatter(self, entity):
         """Get a new formatter instance.