Skip to content

Commit

Permalink
Fix DimensionGraph's primary key traversal order.
Browse files Browse the repository at this point in the history
This fixes the logic to correctly handle the case where one
dimension's required dependency is another dimension's implied
dependency.  It also moves it from a private attribute computed at
construction to a construct-on-first-use public property.  It probably
should have always been public (it's used exclusively by Registry),
and it now needs to be construct-on-first-use to avoid cycles during
DimensionUniverse construction.
  • Loading branch information
TallJimbo authored and isullivan committed Mar 27, 2020
1 parent 2814ad9 commit d2ec93b
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 25 deletions.
60 changes: 37 additions & 23 deletions python/lsst/daf/butler/core/dimensions/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

__all__ = ["DimensionGraph"]

from typing import Optional, Iterable, Iterator, KeysView, Union, Any, TYPE_CHECKING
from typing import Optional, Iterable, Iterator, KeysView, Union, Any, Tuple, TYPE_CHECKING

from ..utils import NamedValueSet, NamedKeyDict, immutable

Expand Down Expand Up @@ -192,28 +192,6 @@ def _finish(self):
self._dimensionIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.dimensions)})
self._elementIndices = NamedKeyDict({element: i for i, element in enumerate(self.elements)})

# Compute an element traversal order that allows element records to be
# found given their primary keys, starting from only the primary keys
# of required dimensions. Unlike the table definition/topological
# order (which is what DimensionUniverse.sorted gives you), when
# dimension A implies dimension B, dimension A appears first.
# This is really for DimensionDatabase/ExpandedDataCoordinate, but
# is stored here so we don't have to recompute it for every coordinate.
todo = set(self.elements)
self._primaryKeyTraversalOrder = []

def addToPrimaryKeyTraversalOrder(element):
if element in todo:
self._primaryKeyTraversalOrder.append(element)
todo.remove(element)
for other in element.implied:
addToPrimaryKeyTraversalOrder(other)

for dimension in self.required:
addToPrimaryKeyTraversalOrder(dimension)

self._primaryKeyTraversalOrder.extend(todo)

def __getnewargs__(self) -> tuple:
return (self.universe, None, tuple(self.dimensions.names), False)

Expand Down Expand Up @@ -415,6 +393,42 @@ def getTemporal(self, *, independent: bool = True,
return _filterDependentElements(self._allTemporal,
prefer=NamedValueSet(self.elements[p] for p in prefer))

@property
def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement]:
"""Return a tuple of all elements in an order allows records to be
found given their primary keys, starting from only the primary keys of
required dimensions (`tuple` [ `DimensionRecord` ]).
Unlike the table definition/topological order (which is what
DimensionUniverse.sorted gives you), when dimension A implies
dimension B, dimension A appears first.
"""
order = getattr(self, "_primaryKeyTraversalOrder", None)
if order is None:
done = set()
order = []

def addToOrder(element) -> bool:
if element.name in done:
return
predecessors = set(element.graph.required.names)
predecessors.discard(element.name)
if not done.issuperset(predecessors):
return
order.append(element)
done.add(element)
for other in element.implied:
addToOrder(other)

while not done.issuperset(self.required):
for dimension in self.required:
addToOrder(dimension)

order.extend(element for element in self.elements if element.name not in done)
order = tuple(order)
self._primaryKeyTraversalOrder = order
return order

# Class attributes below are shadowed by instance attributes, and are
# present just to hold the docstrings for those instance attributes.

Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[Dimen
else:
records = dict(records) if records is not None else {}
keys = dict(standardized)
for element in standardized.graph._primaryKeyTraversalOrder:
for element in standardized.graph.primaryKeyTraversalOrder:
record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
if record is ...:
storage = self._dimensions[element]
Expand Down
13 changes: 12 additions & 1 deletion tests/test_dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pickle
import itertools

from lsst.daf.butler.core.utils import NamedKeyDict
from lsst.daf.butler.core.utils import NamedKeyDict, NamedValueSet
from lsst.daf.butler.core.dimensions import DimensionUniverse, DimensionGraph, Dimension
from lsst.daf.butler.core.dimensions.schema import makeElementTableSpec

Expand Down Expand Up @@ -63,6 +63,17 @@ def checkGraphInvariants(self, graph):
[element for element in graph.elements
if isinstance(element, Dimension)])
self.assertCountEqual(graph.dimensions, itertools.chain(graph.required, graph.implied))
# Check primary key traversal order: each element should follow any it
# requires, and element that is implied by any other in the graph
# follow at least one of those.
seen = NamedValueSet()
for element in graph.primaryKeyTraversalOrder:
with self.subTest(required=graph.required, implied=graph.implied, element=element):
seen.add(element)
self.assertLessEqual(element.graph.required, seen)
if element in graph.implied:
self.assertTrue(any(element in s.implied for s in seen))
self.assertCountEqual(seen, graph.elements)

def testConfigRead(self):
self.assertEqual(self.universe.dimensions.names,
Expand Down

0 comments on commit d2ec93b

Please sign in to comment.