Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-32305: Some speed ups in Config #595

Merged
merged 6 commits into from
Nov 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
File renamed without changes.
1 change: 1 addition & 0 deletions doc/changes/DM-32305.perf.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Minor efficiency improvements when accessing `lsst.daf.butler.Config` hierarchies.
93 changes: 58 additions & 35 deletions python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,36 @@ def _doUpdate(d, u):
return d


def _checkNextItem(k, d, create, must_be_dict):
"""See if k is in d and if it is return the new child."""
nextVal = None
isThere = False
if d is None:
# We have gone past the end of the hierarchy
pass
elif not must_be_dict and isinstance(d, collections.abc.Sequence):
# Check for Sequence first because for lists
# __contains__ checks whether value is found in list
# not whether the index exists in list. When we traverse
# the hierarchy we are interested in the index.
timj marked this conversation as resolved.
Show resolved Hide resolved
try:
nextVal = d[int(k)]
isThere = True
except IndexError:
pass
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with all of the Config code. With that in mind, can d be a list of numbers (d = [10,20,30])? If yes (list is an instance of collections.abc.Sequence), and if k is 30, then it will get an IndexError and isThere will be False, but based on the ValueError line below I think we're wanting isThere to be True.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. It's something like config[".a.2.b"] and a refers to a sequence, 2 to index 2 and then b is a key in the dict inside that sequence. It's features like this that are great for one off lookups in a config but a disaster when doing many of them because you get the isinstance check for sequence every time (except now I've bypassed it for a non-hierarchical look up since it's impossible for a Config top level to be a sequence.

except ValueError:
isThere = k in d
elif k in d:
nextVal = d[k]
isThere = True
elif create:
d[k] = {}
nextVal = d[k]
isThere = True

return nextVal, isThere


class Loader(yamlLoader):
"""YAML Loader that supports file include directives.

Expand Down Expand Up @@ -219,7 +249,9 @@ def __init__(self, other=None):
if isinstance(other, Config):
self._data = copy.deepcopy(other._data)
self.configFile = other.configFile
elif isinstance(other, collections.abc.Mapping):
elif isinstance(other, (dict, collections.abc.Mapping)):
timj marked this conversation as resolved.
Show resolved Hide resolved
# In most cases we have a dict, and it's more efficient
# to check for a dict instance before checking the generic mapping.
self.update(other)
elif isinstance(other, (str, ButlerURI, Path)):
# if other is a string, assume it is a file path/URI
Expand Down Expand Up @@ -544,43 +576,21 @@ def _findInHierarchy(self, keys, create=False):
"""
d = self._data

def checkNextItem(k, d, create):
"""See if k is in d and if it is return the new child."""
nextVal = None
isThere = False
if d is None:
# We have gone past the end of the hierarchy
pass
elif isinstance(d, collections.abc.Sequence):
# Check sequence first because for lists
# __contains__ checks whether value is found in list
# not whether the index exists in list. When we traverse
# the hierarchy we are interested in the index.
try:
nextVal = d[int(k)]
isThere = True
except IndexError:
pass
except ValueError:
isThere = k in d
elif k in d:
nextVal = d[k]
isThere = True
elif create:
d[k] = {}
nextVal = d[k]
isThere = True
return nextVal, isThere
# For the first key, d must be a dict so it is a waste
# of time to check for a sequence.
must_be_dict = True

hierarchy = []
complete = True
for k in keys:
d, isThere = checkNextItem(k, d, create)
d, isThere = _checkNextItem(k, d, create, must_be_dict)
if isThere:
hierarchy.append(d)
else:
complete = False
break
# Second time round it might be a sequence.
must_be_dict = False

return hierarchy, complete

Expand All @@ -589,14 +599,27 @@ def __getitem__(self, name):
# match. This allows `Config.items()` to work via a simple
# __iter__ implementation that returns top level keys of
# self._data.
keys = self._getKeyHierarchy(name)

hierarchy, complete = self._findInHierarchy(keys)
if not complete:
raise KeyError(f"{name} not found")
data = hierarchy[-1]
# If the name matches a key in the top-level hierarchy, bypass
# all further cleverness.
found_directly = False
try:
data = self._data[name]
found_directly = True
except KeyError:
pass

if not found_directly:
keys = self._getKeyHierarchy(name)

hierarchy, complete = self._findInHierarchy(keys)
if not complete:
raise KeyError(f"{name} not found")
data = hierarchy[-1]

if isinstance(data, collections.abc.Mapping):
# In most cases we have a dict, and it's more efficient
# to check for a dict instance before checking the generic mapping.
if isinstance(data, (dict, collections.abc.Mapping)):
data = Config(data)
# Ensure that child configs inherit the parent internal delimiter
if self._D != Config._D:
Expand Down