Skip to content

Commit

Permalink
Remove ill-conceived data ID consistency checking.
Browse files Browse the repository at this point in the history
The only code that used this was in QuantumGraph generation in
pipe_base, and that's already been replaced to improve performance.

More importantly, the parts of this that involved spatial and temporal
consistency never made sense - we have clear use cases where the naive
intersection relationships the butler uses by default for these aren't
appropriate (e.g. jointcal), and I should have realized earlier that
this code didn't leave room for them.
  • Loading branch information
TallJimbo committed Jun 12, 2020
1 parent 5c2a822 commit 1d38cab
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 228 deletions.
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from .core import *
# Import the registry subpackage directly for other symbols.
from .registry import Registry, CollectionType, CollectionSearch, ConsistentDataIds, DatasetTypeRestriction
from .registry import Registry, CollectionType, CollectionSearch, DatasetTypeRestriction
from ._butlerConfig import *
from ._deferredDatasetHandle import *
from ._butler import *
Expand Down
3 changes: 0 additions & 3 deletions python/lsst/daf/butler/registry/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@
class InconsistentDataIdError(ValueError):
"""Exception raised when a data ID contains contradictory key-value pairs,
according to dimension relationships.
This can include the case where the data ID identifies multiple spatial
regions or timespans that are disjoint.
"""


Expand Down
131 changes: 0 additions & 131 deletions python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@
from __future__ import annotations

__all__ = (
"ConsistentDataIds",
"Registry",
)

from collections import defaultdict
import contextlib
from dataclasses import dataclass
import sys
from typing import (
Any,
Expand All @@ -44,10 +42,8 @@
Union,
)

import astropy.time
import sqlalchemy

import lsst.sphgeom
from ..core import (
Config,
DataCoordinate,
Expand All @@ -62,7 +58,6 @@
DimensionUniverse,
ExpandedDataCoordinate,
NamedKeyDict,
Timespan,
StorageClassFactory,
)
from ..core.utils import doImport, iterable, transactional
Expand All @@ -89,67 +84,6 @@
)


@dataclass
class ConsistentDataIds:
"""A struct used to report relationships between data IDs by
`Registry.relateDataIds`.
If an instance of this class is returned (instead of `None`), the data IDs
are "not inconsistent" - any keys they have in common have the same value,
and any spatial or temporal relationships they have at least might involve
an overlap. To capture this, any instance of `ConsistentDataIds` coerces
to `True` in boolean contexts.
"""

overlaps: bool
"""If `True`, the data IDs have at least one key in common, associated with
the same value.
Note that data IDs are not inconsistent even if overlaps is `False` - they
may simply have no keys in common, which means they cannot have
inconsistent values for any keys. They may even be equal, in the case that
both data IDs are empty.
This field does _not_ indicate whether a spatial or temporal overlap
relationship exists.
"""

contains: bool
"""If `True`, all keys in the first data ID are in the second, and are
associated with the same values.
This includes case where the first data ID is empty.
"""

within: bool
"""If `True`, all keys in the second data ID are in the first, and are
associated with the same values.
This includes case where the second data ID is empty.
"""

@property
def equal(self) -> bool:
"""If `True`, the two data IDs are the same.
Data IDs are equal if they have both a `contains` and a `within`
relationship.
"""
return self.contains and self.within

@property
def disjoint(self) -> bool:
"""If `True`, the two data IDs have no keys in common.
This is simply the oppose of `overlaps`. Disjoint datasets are by
definition not inconsistent.
"""
return not self.overlaps

def __bool__(self) -> bool:
return True


class Registry:
"""Registry interface.
Expand Down Expand Up @@ -861,8 +795,6 @@ def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[Dimen
else:
records = NamedKeyDict(records) if records is not None else NamedKeyDict()
keys = dict(standardized.byName())
regions: List[lsst.sphgeom.ConvexPolygon] = []
timespans: List[Timespan[astropy.time.Time]] = []
for element in standardized.graph.primaryKeyTraversalOrder:
record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
if record is ...:
Expand All @@ -877,16 +809,6 @@ def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[Dimen
f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
f"but {element.name} implies {d.name}={value!r}."
)
if element in standardized.graph.spatial and record.region is not None:
if any(record.region.relate(r) & lsst.sphgeom.DISJOINT for r in regions):
raise InconsistentDataIdError(f"Data ID {standardized}'s region for {element.name} "
f"is disjoint with those for other elements.")
regions.append(record.region)
if element in standardized.graph.temporal:
if any(not record.timespan.overlaps(t) for t in timespans):
raise InconsistentDataIdError(f"Data ID {standardized}'s timespan for {element.name}"
f" is disjoint with those for other elements.")
timespans.append(record.timespan)
else:
if element in standardized.graph.required:
raise LookupError(
Expand All @@ -901,59 +823,6 @@ def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[Dimen
records.update((d, None) for d in element.implied)
return ExpandedDataCoordinate(standardized.graph, standardized.values(), records=records)

def relateDataIds(self, a: DataId, b: DataId) -> Optional[ConsistentDataIds]:
"""Compare the keys and values of a pair of data IDs for consistency.
See `ConsistentDataIds` for more information.
Parameters
----------
a : `dict` or `DataCoordinate`
First data ID to be compared.
b : `dict` or `DataCoordinate`
Second data ID to be compared.
Returns
-------
relationship : `ConsistentDataIds` or `None`
Relationship information. This is not `None` and coerces to
`True` in boolean contexts if and only if the data IDs are
consistent in terms of all common key-value pairs, all many-to-many
join tables, and all spatial andtemporal relationships.
"""
a = DataCoordinate.standardize(a, universe=self.dimensions)
b = DataCoordinate.standardize(b, universe=self.dimensions)
aFull = getattr(a, "full", None)
bFull = getattr(b, "full", None)
aBest = aFull if aFull is not None else a
bBest = bFull if bFull is not None else b
jointKeys = aBest.keys() & bBest.keys()
# If any common values are not equal, we know they are inconsistent.
if any(aBest[k] != bBest[k] for k in jointKeys):
return None
# If the graphs are equal, we know the data IDs are.
if a.graph == b.graph:
return ConsistentDataIds(contains=True, within=True, overlaps=bool(jointKeys))
# Result is still inconclusive. Try to expand a data ID containing
# keys from both; that will fail if they are inconsistent.
# First, if either input was already an ExpandedDataCoordinate, extract
# its records so we don't have to query for them.
records: NamedKeyDict[DimensionElement, Optional[DimensionRecord]] = NamedKeyDict()
if isinstance(a, ExpandedDataCoordinate):
records.update(a.records)
if isinstance(b, ExpandedDataCoordinate):
records.update(b.records)
try:
self.expandDataId({**a.byName(), **b.byName()}, graph=(a.graph | b.graph), records=records)
except InconsistentDataIdError:
return None
# We know the answer is not `None`; time to figure out what it is.
return ConsistentDataIds(
contains=(a.graph >= b.graph),
within=(a.graph <= b.graph),
overlaps=bool(a.graph & b.graph),
)

def insertDimensionData(self, element: Union[DimensionElement, str],
*data: Union[Mapping[str, Any], DimensionRecord],
conform: bool = True) -> None:
Expand Down
99 changes: 6 additions & 93 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from .._registry import (
CollectionType,
ConflictingDefinitionError,
ConsistentDataIds,
InconsistentDataIdError,
Registry,
RegistryConfig,
)
Expand Down Expand Up @@ -242,89 +242,11 @@ def testDimensions(self):
)

def testDataIdRelationships(self):
"""Test `Registry.relateDataId`.
"""Test that `Registry.expandDataId` raises an exception when the given
keys are inconsistent.
"""
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
# Simple cases where the dimension key-value pairs tell us everything.
self.assertEqual(
registry.relateDataIds(
{"instrument": "Cam1"},
{"instrument": "Cam1"},
),
ConsistentDataIds(contains=True, within=True, overlaps=True)
)
self.assertEqual(
registry.relateDataIds({}, {}),
ConsistentDataIds(contains=True, within=True, overlaps=False)
)
self.assertEqual(
registry.relateDataIds({"instrument": "Cam1"}, {}),
ConsistentDataIds(contains=True, within=False, overlaps=False)
)
self.assertEqual(
registry.relateDataIds({}, {"instrument": "Cam1"}),
ConsistentDataIds(contains=False, within=True, overlaps=False)
)
self.assertEqual(
registry.relateDataIds(
{"instrument": "Cam1", "physical_filter": "Cam1-G"},
{"instrument": "Cam1"},
),
ConsistentDataIds(contains=True, within=False, overlaps=True)
)
self.assertEqual(
registry.relateDataIds(
{"instrument": "Cam1"},
{"instrument": "Cam1", "physical_filter": "Cam1-G"},
),
ConsistentDataIds(contains=False, within=True, overlaps=True)
)
self.assertIsNone(
registry.relateDataIds(
{"instrument": "Cam1", "physical_filter": "Cam1-G"},
{"instrument": "Cam1", "physical_filter": "Cam1-R1"},
)
)
# Trickier cases where we need to expand data IDs, but it's still just
# required and implied dimension relationships.
self.assertEqual(
registry.relateDataIds(
{"instrument": "Cam1", "physical_filter": "Cam1-G"},
{"instrument": "Cam1", "abstract_filter": "g"},
),
ConsistentDataIds(contains=True, within=False, overlaps=True)
)
self.assertEqual(
registry.relateDataIds(
{"instrument": "Cam1", "abstract_filter": "g"},
{"instrument": "Cam1", "physical_filter": "Cam1-G"},
),
ConsistentDataIds(contains=False, within=True, overlaps=True)
)
self.assertEqual(
registry.relateDataIds(
{"instrument": "Cam1"},
{"htm7": 131073},
),
ConsistentDataIds(contains=False, within=False, overlaps=False)
)
# Trickiest cases involve spatial or temporal overlaps or non-dimension
# elements that relate things (of which visit_definition is our only
# current example).
#
# These two HTM IDs at different levels have a "contains" relationship
# spatially, but there is no overlap of dimension keys. The exact
# result of relateDataIds is unspecified for this case, but it's
# guaranteed to be truthy (see relateDataIds docs.).
self.assertTrue(
registry.relateDataIds({"htm7": 131073}, {"htm9": 2097169})
)
# These two HTM IDs at different levels are disjoint spatially, which
# means the data IDs are inconsistent.
self.assertIsNone(
registry.relateDataIds({"htm7": 131073}, {"htm9": 2097391})
)
# Insert a few more dimension records for the next test.
registry.insertDimensionData(
"exposure",
Expand All @@ -346,19 +268,10 @@ def testDataIdRelationships(self):
"visit_definition",
{"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
)
self.assertEqual(
registry.relateDataIds(
{"instrument": "Cam1", "visit": 1},
{"instrument": "Cam1", "exposure": 1},
),
ConsistentDataIds(contains=False, within=False, overlaps=True)
)
self.assertIsNone(
registry.relateDataIds(
{"instrument": "Cam1", "visit": 1},
{"instrument": "Cam1", "exposure": 2},
with self.assertRaises(InconsistentDataIdError):
registry.expandDataId(
{"instrument": "Cam1", "visit": 1, "exposure": 2},
)
)

def testDataset(self):
"""Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
Expand Down

0 comments on commit 1d38cab

Please sign in to comment.