Remove ill-conceived data ID consistency checking.

The only code that used this was in QuantumGraph generation in pipe_base, and that's already been replaced to improve performance. More importantly, the parts of this that involved spatial and temporal consistency never made sense - we have clear use cases where the naive intersection relationships the butler uses by default for these aren't appropriate (e.g. jointcal), and I should have realized earlier that this code didn't leave room for them.
lsst · Jun 12, 2020 · 1d38cab · 1d38cab
1 parent 5c2a822
commit 1d38cab
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 228 deletions.
diff --git a/python/lsst/daf/butler/__init__.py b/python/lsst/daf/butler/__init__.py
@@ -7,7 +7,7 @@
 
 from .core import *
 # Import the registry subpackage directly for other symbols.
-from .registry import Registry, CollectionType, CollectionSearch, ConsistentDataIds, DatasetTypeRestriction
+from .registry import Registry, CollectionType, CollectionSearch, DatasetTypeRestriction
 from ._butlerConfig import *
 from ._deferredDatasetHandle import *
 from ._butler import *

diff --git a/python/lsst/daf/butler/registry/_exceptions.py b/python/lsst/daf/butler/registry/_exceptions.py
@@ -26,9 +26,6 @@
 class InconsistentDataIdError(ValueError):
     """Exception raised when a data ID contains contradictory key-value pairs,
     according to dimension relationships.
-
-    This can include the case where the data ID identifies multiple spatial
-    regions or timespans that are disjoint.
     """
 
 

diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py
@@ -22,13 +22,11 @@
 from __future__ import annotations
 
 __all__ = (
-    "ConsistentDataIds",
     "Registry",
 )
 
 from collections import defaultdict
 import contextlib
-from dataclasses import dataclass
 import sys
 from typing import (
     Any,
@@ -44,10 +42,8 @@
     Union,
 )
 
-import astropy.time
 import sqlalchemy
 
-import lsst.sphgeom
 from ..core import (
     Config,
     DataCoordinate,
@@ -62,7 +58,6 @@
     DimensionUniverse,
     ExpandedDataCoordinate,
     NamedKeyDict,
-    Timespan,
     StorageClassFactory,
 )
 from ..core.utils import doImport, iterable, transactional
@@ -89,67 +84,6 @@
     )
 
 
-@dataclass
-class ConsistentDataIds:
-    """A struct used to report relationships between data IDs by
-    `Registry.relateDataIds`.
-
-    If an instance of this class is returned (instead of `None`), the data IDs
-    are "not inconsistent" - any keys they have in common have the same value,
-    and any spatial or temporal relationships they have at least might involve
-    an overlap.  To capture this, any instance of `ConsistentDataIds` coerces
-    to `True` in boolean contexts.
-    """
-
-    overlaps: bool
-    """If `True`, the data IDs have at least one key in common, associated with
-    the same value.
-
-    Note that data IDs are not inconsistent even if overlaps is `False` - they
-    may simply have no keys in common, which means they cannot have
-    inconsistent values for any keys.  They may even be equal, in the case that
-    both data IDs are empty.
-
-    This field does _not_ indicate whether a spatial or temporal overlap
-    relationship exists.
-    """
-
-    contains: bool
-    """If `True`, all keys in the first data ID are in the second, and are
-    associated with the same values.
-
-    This includes case where the first data ID is empty.
-    """
-
-    within: bool
-    """If `True`, all keys in the second data ID are in the first, and are
-    associated with the same values.
-
-    This includes case where the second data ID is empty.
-    """
-
-    @property
-    def equal(self) -> bool:
-        """If `True`, the two data IDs are the same.
-
-        Data IDs are equal if they have both a `contains` and a `within`
-        relationship.
-        """
-        return self.contains and self.within
-
-    @property
-    def disjoint(self) -> bool:
-        """If `True`, the two data IDs have no keys in common.
-
-        This is simply the oppose of `overlaps`.  Disjoint datasets are by
-        definition not inconsistent.
-        """
-        return not self.overlaps
-
-    def __bool__(self) -> bool:
-        return True
-
-
 class Registry:
     """Registry interface.
 
@@ -861,8 +795,6 @@ def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[Dimen
         else:
             records = NamedKeyDict(records) if records is not None else NamedKeyDict()
         keys = dict(standardized.byName())
-        regions: List[lsst.sphgeom.ConvexPolygon] = []
-        timespans: List[Timespan[astropy.time.Time]] = []
         for element in standardized.graph.primaryKeyTraversalOrder:
             record = records.get(element.name, ...)  # Use ... to mean not found; None might mean NULL
             if record is ...:
@@ -877,16 +809,6 @@ def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[Dimen
                             f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
                             f"but {element.name} implies {d.name}={value!r}."
                         )
-                if element in standardized.graph.spatial and record.region is not None:
-                    if any(record.region.relate(r) & lsst.sphgeom.DISJOINT for r in regions):
-                        raise InconsistentDataIdError(f"Data ID {standardized}'s region for {element.name} "
-                                                      f"is disjoint with those for other elements.")
-                    regions.append(record.region)
-                if element in standardized.graph.temporal:
-                    if any(not record.timespan.overlaps(t) for t in timespans):
-                        raise InconsistentDataIdError(f"Data ID {standardized}'s timespan for {element.name}"
-                                                      f" is disjoint with those for other elements.")
-                    timespans.append(record.timespan)
             else:
                 if element in standardized.graph.required:
                     raise LookupError(
@@ -901,59 +823,6 @@ def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[Dimen
                 records.update((d, None) for d in element.implied)
         return ExpandedDataCoordinate(standardized.graph, standardized.values(), records=records)
 
-    def relateDataIds(self, a: DataId, b: DataId) -> Optional[ConsistentDataIds]:
-        """Compare the keys and values of a pair of data IDs for consistency.
-
-        See `ConsistentDataIds` for more information.
-
-        Parameters
-        ----------
-        a : `dict` or `DataCoordinate`
-            First data ID to be compared.
-        b : `dict` or `DataCoordinate`
-            Second data ID to be compared.
-
-        Returns
-        -------
-        relationship : `ConsistentDataIds` or `None`
-            Relationship information.  This is not `None` and coerces to
-            `True` in boolean contexts if and only if the data IDs are
-            consistent in terms of all common key-value pairs, all many-to-many
-            join tables, and all spatial andtemporal relationships.
-        """
-        a = DataCoordinate.standardize(a, universe=self.dimensions)
-        b = DataCoordinate.standardize(b, universe=self.dimensions)
-        aFull = getattr(a, "full", None)
-        bFull = getattr(b, "full", None)
-        aBest = aFull if aFull is not None else a
-        bBest = bFull if bFull is not None else b
-        jointKeys = aBest.keys() & bBest.keys()
-        # If any common values are not equal, we know they are inconsistent.
-        if any(aBest[k] != bBest[k] for k in jointKeys):
-            return None
-        # If the graphs are equal, we know the data IDs are.
-        if a.graph == b.graph:
-            return ConsistentDataIds(contains=True, within=True, overlaps=bool(jointKeys))
-        # Result is still inconclusive.  Try to expand a data ID containing
-        # keys from both; that will fail if they are inconsistent.
-        # First, if either input was already an ExpandedDataCoordinate, extract
-        # its records so we don't have to query for them.
-        records: NamedKeyDict[DimensionElement, Optional[DimensionRecord]] = NamedKeyDict()
-        if isinstance(a, ExpandedDataCoordinate):
-            records.update(a.records)
-        if isinstance(b, ExpandedDataCoordinate):
-            records.update(b.records)
-        try:
-            self.expandDataId({**a.byName(), **b.byName()}, graph=(a.graph | b.graph), records=records)
-        except InconsistentDataIdError:
-            return None
-        # We know the answer is not `None`; time to figure out what it is.
-        return ConsistentDataIds(
-            contains=(a.graph >= b.graph),
-            within=(a.graph <= b.graph),
-            overlaps=bool(a.graph & b.graph),
-        )
-
     def insertDimensionData(self, element: Union[DimensionElement, str],
                             *data: Union[Mapping[str, Any], DimensionRecord],
                             conform: bool = True) -> None:

diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py
@@ -43,7 +43,7 @@
 from .._registry import (
     CollectionType,
     ConflictingDefinitionError,
-    ConsistentDataIds,
+    InconsistentDataIdError,
     Registry,
     RegistryConfig,
 )
@@ -242,89 +242,11 @@ def testDimensions(self):
             )
 
     def testDataIdRelationships(self):
-        """Test `Registry.relateDataId`.
+        """Test that `Registry.expandDataId` raises an exception when the given
+        keys are inconsistent.
         """
         registry = self.makeRegistry()
         self.loadData(registry, "base.yaml")
-        # Simple cases where the dimension key-value pairs tell us everything.
-        self.assertEqual(
-            registry.relateDataIds(
-                {"instrument": "Cam1"},
-                {"instrument": "Cam1"},
-            ),
-            ConsistentDataIds(contains=True, within=True, overlaps=True)
-        )
-        self.assertEqual(
-            registry.relateDataIds({}, {}),
-            ConsistentDataIds(contains=True, within=True, overlaps=False)
-        )
-        self.assertEqual(
-            registry.relateDataIds({"instrument": "Cam1"}, {}),
-            ConsistentDataIds(contains=True, within=False, overlaps=False)
-        )
-        self.assertEqual(
-            registry.relateDataIds({}, {"instrument": "Cam1"}),
-            ConsistentDataIds(contains=False, within=True, overlaps=False)
-        )
-        self.assertEqual(
-            registry.relateDataIds(
-                {"instrument": "Cam1", "physical_filter": "Cam1-G"},
-                {"instrument": "Cam1"},
-            ),
-            ConsistentDataIds(contains=True, within=False, overlaps=True)
-        )
-        self.assertEqual(
-            registry.relateDataIds(
-                {"instrument": "Cam1"},
-                {"instrument": "Cam1", "physical_filter": "Cam1-G"},
-            ),
-            ConsistentDataIds(contains=False, within=True, overlaps=True)
-        )
-        self.assertIsNone(
-            registry.relateDataIds(
-                {"instrument": "Cam1", "physical_filter": "Cam1-G"},
-                {"instrument": "Cam1", "physical_filter": "Cam1-R1"},
-            )
-        )
-        # Trickier cases where we need to expand data IDs, but it's still just
-        # required and implied dimension relationships.
-        self.assertEqual(
-            registry.relateDataIds(
-                {"instrument": "Cam1", "physical_filter": "Cam1-G"},
-                {"instrument": "Cam1", "abstract_filter": "g"},
-            ),
-            ConsistentDataIds(contains=True, within=False, overlaps=True)
-        )
-        self.assertEqual(
-            registry.relateDataIds(
-                {"instrument": "Cam1", "abstract_filter": "g"},
-                {"instrument": "Cam1", "physical_filter": "Cam1-G"},
-            ),
-            ConsistentDataIds(contains=False, within=True, overlaps=True)
-        )
-        self.assertEqual(
-            registry.relateDataIds(
-                {"instrument": "Cam1"},
-                {"htm7": 131073},
-            ),
-            ConsistentDataIds(contains=False, within=False, overlaps=False)
-        )
-        # Trickiest cases involve spatial or temporal overlaps or non-dimension
-        # elements that relate things (of which visit_definition is our only
-        # current example).
-        #
-        # These two HTM IDs at different levels have a "contains" relationship
-        # spatially, but there is no overlap of dimension keys.  The exact
-        # result of relateDataIds is unspecified for this case, but it's
-        # guaranteed to be truthy (see relateDataIds docs.).
-        self.assertTrue(
-            registry.relateDataIds({"htm7": 131073}, {"htm9": 2097169})
-        )
-        # These two HTM IDs at different levels are disjoint spatially, which
-        # means the data IDs are inconsistent.
-        self.assertIsNone(
-            registry.relateDataIds({"htm7": 131073}, {"htm9": 2097391})
-        )
         # Insert a few more dimension records for the next test.
         registry.insertDimensionData(
             "exposure",
@@ -346,19 +268,10 @@ def testDataIdRelationships(self):
             "visit_definition",
             {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
         )
-        self.assertEqual(
-            registry.relateDataIds(
-                {"instrument": "Cam1", "visit": 1},
-                {"instrument": "Cam1", "exposure": 1},
-            ),
-            ConsistentDataIds(contains=False, within=False, overlaps=True)
-        )
-        self.assertIsNone(
-            registry.relateDataIds(
-                {"instrument": "Cam1", "visit": 1},
-                {"instrument": "Cam1", "exposure": 2},
+        with self.assertRaises(InconsistentDataIdError):
+            registry.expandDataId(
+                {"instrument": "Cam1", "visit": 1, "exposure": 2},
             )
-        )
 
     def testDataset(self):
         """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,