Merge pull request #330 from lsst/tickets/DM-25919

DM-25919: custom classes and new functionality for query results
lsst · Aug 7, 2020 · 27159a1 · 27159a1
2 parents cf1f528 + ad5a949
commit 27159a1
Show file tree

Hide file tree

Showing 30 changed files with 2,401 additions and 889 deletions.
diff --git a/doc/lsst.daf.butler/dimensions.rst b/doc/lsst.daf.butler/dimensions.rst
@@ -70,7 +70,7 @@ Spatial and Temporal Dimensions
 
 Dimensions can be *spatial* or *temporal* (or both, or neither), meaning that each record is associated with a region on the sky or a timespan (respectively).
 The overlaps between regions and timespans define many-to-many relationships between dimensions that --- along with the one-to-many ID-based dependencies --- generally provide a way to fully relate any set of dimensions.
-This produces a natural, concise query system; dimension relationships can be used to construct the full ``JOIN`` clause of a SQL ``SELECT`` with no input from the user, allowing them to specify just the ``WHERE`` clause (see `Registry.queryDimensions` and `Registry.queryDatasets`).
+This produces a natural, concise query system; dimension relationships can be used to construct the full ``JOIN`` clause of a SQL ``SELECT`` with no input from the user, allowing them to specify just the ``WHERE`` clause (see `Registry.queryDataIds` and `Registry.queryDatasets`).
 It is also possible to associate a region or timespan with a combination of dimensions (such as the region for a visit and a detector), by defining a `DimensionElement` for that combination.
 
 One kind of spatial dimension is special: a `SkyPixDimension` represents a complete pixelization of the sky, as defined by an `lsst.sphgeom.Pixelization` object.

diff --git a/doc/lsst.daf.butler/index.rst b/doc/lsst.daf.butler/index.rst
@@ -132,9 +132,6 @@ Database backends
 .. automodapi:: lsst.daf.butler.registry.databases.postgresql
    :no-main-docstr:
    :headings: ^"
-.. automodapi:: lsst.daf.butler.registry.databases.oracle
-   :no-main-docstr:
-   :headings: ^"
 
 Support API
 -----------

diff --git a/doc/lsst.daf.butler/queries.rst b/doc/lsst.daf.butler/queries.rst
@@ -8,7 +8,7 @@ Querying datasets
 =================
 
 Datasets in a butler-managed data repository are identified by the combination of their *dataset type* and *data ID* within a *collection*.
-The `Registry` class's query methods (`~Registry.queryDatasetTypes`, `~Registry.queryCollections`, `~Registry.queryDimensions`, and `~Registry.queryDatasets`) allow these to be specified either fully or partially in various ways.
+The `Registry` class's query methods (`~Registry.queryDatasetTypes`, `~Registry.queryCollections`, `~Registry.queryDataIds`, and `~Registry.queryDatasets`) allow these to be specified either fully or partially in various ways.
 
 .. _daf_butler_dataset_type_expressions:
 
@@ -80,7 +80,7 @@ The grammar is based on standard SQL; it is a subset of SQL expression language
 Expression structure
 ^^^^^^^^^^^^^^^^^^^^
 
-The expression is passed as a string via the ``where`` arguments of `~Registry.queryDimensions` and `~Registry.queryDatasets`.
+The expression is passed as a string via the ``where`` arguments of `~Registry.queryDataIds` and `~Registry.queryDatasets`.
 The string contains a single boolean expression which evaluates to true or
 false (if it is a valid expression). Expression can contain a bunch of
 standard logical operators, comparisons, literals, and identifiers which are

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -1236,7 +1236,7 @@ def export(self, *, directory: Optional[str] = None,
 
         Examples
         --------
-        Typically the `Registry.queryDimensions` and `Registry.queryDatasets`
+        Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
         methods are used to provide the iterables over data IDs and/or datasets
         to be exported::
 
@@ -1373,7 +1373,7 @@ def validateConfiguration(self, logFailures: bool = False,
 
         # Find all the registered instruments
         instruments = set(
-            dataId["instrument"] for dataId in self.registry.queryDimensions(["instrument"])
+            record.name for record in self.registry.queryDimensionRecords("instrument")
         )
 
         # For each datasetType that has an instrument dimension, create

diff --git a/python/lsst/daf/butler/configs/registry.yaml b/python/lsst/daf/butler/configs/registry.yaml
@@ -4,7 +4,6 @@ registry:
   engines:
     sqlite: lsst.daf.butler.registry.databases.sqlite.SqliteDatabase
     postgresql: lsst.daf.butler.registry.databases.postgresql.PostgresqlDatabase
-    oracle: lsst.daf.butler.registry.databases.oracle.OracleDatabase
   managers:
     attributes: lsst.daf.butler.registry.attributes.DefaultButlerAttributeManager
     opaque: lsst.daf.butler.registry.opaque.ByNameOpaqueTableStorageManager

diff --git a/python/lsst/daf/butler/core/dimensions/_dataCoordinateIterable.py b/python/lsst/daf/butler/core/dimensions/_dataCoordinateIterable.py
@@ -672,8 +672,7 @@ class DataCoordinateSequence(_DataCoordinateCollectionBase, Sequence[DataCoordin
     ----------
     dataIds : `collections.abc.Sequence` [ `DataCoordinate` ]
         A sequence of `DataCoordinate` instances, with dimensions equal to
-        ``graph``.  If this is a mutable object, the caller must be able to
-        guarantee that it will not be modified by any other holders.
+        ``graph``.
     graph : `DimensionGraph`
         Dimensions identified by all data IDs in the set.
     hasFull : `bool`, optional
@@ -698,7 +697,7 @@ class DataCoordinateSequence(_DataCoordinateCollectionBase, Sequence[DataCoordin
     def __init__(self, dataIds: Sequence[DataCoordinate], graph: DimensionGraph, *,
                  hasFull: Optional[bool] = None, hasRecords: Optional[bool] = None,
                  check: bool = True):
-        super().__init__(dataIds, graph, hasFull=hasFull, hasRecords=hasRecords, check=check)
+        super().__init__(tuple(dataIds), graph, hasFull=hasFull, hasRecords=hasRecords, check=check)
 
     _dataIds: Sequence[DataCoordinate]
 

diff --git a/python/lsst/daf/butler/core/dimensions/records.py b/python/lsst/daf/butler/core/dimensions/records.py
@@ -172,6 +172,14 @@ def fromDict(cls, mapping: Mapping[str, Any]) -> DimensionRecord:
         values = tuple(d.get(k) for k in cls.__slots__)
         return cls(*values)
 
+    def __eq__(self, other: Any) -> bool:
+        if type(other) != type(self):
+            return False
+        return self.dataId == other.dataId
+
+    def __hash__(self) -> int:
+        return hash(self.dataId)
+
     def __str__(self) -> str:
         lines = [f"{self.definition.name}:"]
         lines.extend(f"  {field}: {getattr(self, field)!r}" for field in self.fields)
@@ -197,9 +205,14 @@ def toDict(self) -> Dict[str, Any]:
 
     dataId: DataCoordinate
     """A dict-like identifier for this record's primary keys
-    (`MinimalDataCoordinate`).
+    (`DataCoordinate`).
     """
 
     definition: ClassVar[DimensionElement]
+    """The `DimensionElement` whose records this class represents
+    (`DimensionElement`).
+    """
 
     fields: ClassVar[Tuple[str, ...]]
+    """The names of all fields in this class (`tuple` [ `str` ]).
+    """
diff --git a/python/lsst/daf/butler/core/dimensions/schema.py b/python/lsst/daf/butler/core/dimensions/schema.py
@@ -73,7 +73,8 @@ def _makeForeignKeySpec(dimension: Dimension) -> ddl.ForeignKeySpec:
 
 
 def addDimensionForeignKey(tableSpec: ddl.TableSpec, dimension: Dimension, *,
-                           primaryKey: bool, nullable: bool = False) -> ddl.FieldSpec:
+                           primaryKey: bool, nullable: bool = False, constraint: bool = True
+                           ) -> ddl.FieldSpec:
     """Add a field and possibly a foreign key to a table specification that
     reference the table for the given `Dimension`.
 
@@ -93,6 +94,9 @@ def addDimensionForeignKey(tableSpec: ddl.TableSpec, dimension: Dimension, *,
     nullable : `bool`, optional
         If `False` (default) the new field will be added with a NOT NULL
         constraint.
+    constraint : `bool`
+        If `False` (`True` is default), just add the field, not the foreign
+        key constraint.
 
     Returns
     -------
@@ -107,8 +111,8 @@ def addDimensionForeignKey(tableSpec: ddl.TableSpec, dimension: Dimension, *,
     fieldSpec.nullable = nullable
     tableSpec.fields.add(fieldSpec)
     # Also add a foreign key constraint on the dependency table, but only if
-    # there actually is one.
-    if dimension.hasTable() and dimension.viewOf is None:
+    # there actually is one and we weren't told not to.
+    if dimension.hasTable() and dimension.viewOf is None and constraint:
         tableSpec.foreignKeys.append(_makeForeignKeySpec(dimension))
     return fieldSpec
 

diff --git a/python/lsst/daf/butler/core/simpleQuery.py b/python/lsst/daf/butler/core/simpleQuery.py
@@ -97,8 +97,16 @@ def join(self, table: sqlalchemy.sql.FromClause, *,
         """
         if self._from is None:
             self._from = table
-        else:
+        elif onclause is not None:
             self._from = self._from.join(table, onclause=onclause, isouter=isouter, full=full)
+        else:
+            # New table is completely unrelated to all already-included
+            # tables.  We need a cross join here but SQLAlchemy does not
+            # have a specific method for that. Using join() without
+            # `onclause` will try to join on FK and will raise an exception
+            # for unrelated tables, so we have to use `onclause` which is
+            # always true.
+            self._from = self._from.join(table, sqlalchemy.sql.literal(True))
         for name, arg in kwargs.items():
             if arg is self.Select:
                 self.columns.append(table.columns[name].label(name))