Skip to content

Commit

Permalink
Merge pull request #595 from lsst/tickets/DM-32305
Browse files Browse the repository at this point in the history
DM-32305: Some speed ups in Config
  • Loading branch information
timj committed Nov 9, 2021
2 parents a8150d9 + 04094b8 commit bf61746
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 35 deletions.
File renamed without changes.
1 change: 1 addition & 0 deletions doc/changes/DM-32305.perf.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Minor efficiency improvements when accessing `lsst.daf.butler.Config` hierarchies.
93 changes: 58 additions & 35 deletions python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,36 @@ def _doUpdate(d, u):
return d


def _checkNextItem(k, d, create, must_be_dict):
"""See if k is in d and if it is return the new child."""
nextVal = None
isThere = False
if d is None:
# We have gone past the end of the hierarchy
pass
elif not must_be_dict and isinstance(d, collections.abc.Sequence):
# Check for Sequence first because for lists
# __contains__ checks whether value is found in list
# not whether the index exists in list. When we traverse
# the hierarchy we are interested in the index.
try:
nextVal = d[int(k)]
isThere = True
except IndexError:
pass
except ValueError:
isThere = k in d
elif k in d:
nextVal = d[k]
isThere = True
elif create:
d[k] = {}
nextVal = d[k]
isThere = True

return nextVal, isThere


class Loader(yamlLoader):
"""YAML Loader that supports file include directives.
Expand Down Expand Up @@ -219,7 +249,9 @@ def __init__(self, other=None):
if isinstance(other, Config):
self._data = copy.deepcopy(other._data)
self.configFile = other.configFile
elif isinstance(other, collections.abc.Mapping):
elif isinstance(other, (dict, collections.abc.Mapping)):
# In most cases we have a dict, and it's more efficient
# to check for a dict instance before checking the generic mapping.
self.update(other)
elif isinstance(other, (str, ButlerURI, Path)):
# if other is a string, assume it is a file path/URI
Expand Down Expand Up @@ -544,43 +576,21 @@ def _findInHierarchy(self, keys, create=False):
"""
d = self._data

def checkNextItem(k, d, create):
"""See if k is in d and if it is return the new child."""
nextVal = None
isThere = False
if d is None:
# We have gone past the end of the hierarchy
pass
elif isinstance(d, collections.abc.Sequence):
# Check sequence first because for lists
# __contains__ checks whether value is found in list
# not whether the index exists in list. When we traverse
# the hierarchy we are interested in the index.
try:
nextVal = d[int(k)]
isThere = True
except IndexError:
pass
except ValueError:
isThere = k in d
elif k in d:
nextVal = d[k]
isThere = True
elif create:
d[k] = {}
nextVal = d[k]
isThere = True
return nextVal, isThere
# For the first key, d must be a dict so it is a waste
# of time to check for a sequence.
must_be_dict = True

hierarchy = []
complete = True
for k in keys:
d, isThere = checkNextItem(k, d, create)
d, isThere = _checkNextItem(k, d, create, must_be_dict)
if isThere:
hierarchy.append(d)
else:
complete = False
break
# Second time round it might be a sequence.
must_be_dict = False

return hierarchy, complete

Expand All @@ -589,14 +599,27 @@ def __getitem__(self, name):
# match. This allows `Config.items()` to work via a simple
# __iter__ implementation that returns top level keys of
# self._data.
keys = self._getKeyHierarchy(name)

hierarchy, complete = self._findInHierarchy(keys)
if not complete:
raise KeyError(f"{name} not found")
data = hierarchy[-1]
# If the name matches a key in the top-level hierarchy, bypass
# all further cleverness.
found_directly = False
try:
data = self._data[name]
found_directly = True
except KeyError:
pass

if not found_directly:
keys = self._getKeyHierarchy(name)

hierarchy, complete = self._findInHierarchy(keys)
if not complete:
raise KeyError(f"{name} not found")
data = hierarchy[-1]

if isinstance(data, collections.abc.Mapping):
# In most cases we have a dict, and it's more efficient
# to check for a dict instance before checking the generic mapping.
if isinstance(data, (dict, collections.abc.Mapping)):
data = Config(data)
# Ensure that child configs inherit the parent internal delimiter
if self._D != Config._D:
Expand Down

0 comments on commit bf61746

Please sign in to comment.