Skip to content

Commit

Permalink
Merge pull request #731 from lsst/tickets/DM-36111
Browse files Browse the repository at this point in the history
DM-36111: miscellaneous improvements to registry support classes.
  • Loading branch information
TallJimbo committed Sep 21, 2022
2 parents 4ca570b + fd68878 commit 1a6169c
Show file tree
Hide file tree
Showing 38 changed files with 1,020 additions and 1,166 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-36111.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix bug in `elements` argument to various export methods that prevented it from doing anything.
1 change: 1 addition & 0 deletions doc/changes/DM-36111.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Define regions for data IDs with multiple spatial dimensions to the intersection of those dimensions' regions.
7 changes: 2 additions & 5 deletions doc/lsst.daf.butler/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,6 @@ Python API reference
.. automodapi:: lsst.daf.butler.registry.queries
:no-main-docstr:

.. automodapi:: lsst.daf.butler.registry.summaries
:no-main-docstr:

.. automodapi:: lsst.daf.butler.registry.wildcards
:no-main-docstr:

Expand Down Expand Up @@ -186,8 +183,8 @@ Test utilities
:no-main-docstr:
:no-inheritance-diagram:

Command Line Inteface API
-------------------------
Command Line Interface API
--------------------------

.. warning::
The command line interface API (everything in ``lsst.daf.butler.cli``) is for only for developer use to write command line interfaces, and is not intended for general use.
Expand Down
328 changes: 1 addition & 327 deletions python/lsst/daf/butler/core/_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,17 @@
from __future__ import annotations

__all__ = (
"SpatialRegionDatabaseRepresentation",
"TopologicalSpace",
"TopologicalFamily",
"TopologicalRelationshipEndpoint",
"TopologicalExtentDatabaseRepresentation",
)

import enum
from abc import ABC, abstractmethod
from typing import Any, ClassVar, Dict, Generic, Iterator, Mapping, Optional, Tuple, Type, TypeVar
from typing import Any, Mapping, Optional

import lsst.sphgeom
import sqlalchemy
from lsst.utils.classes import immutable

from . import ddl
from .named import NamedValueAbstractSet


Expand Down Expand Up @@ -169,324 +164,3 @@ def spatial(self) -> Optional[TopologicalFamily]:
def temporal(self) -> Optional[TopologicalFamily]:
"""Return this endpoint's `~TopologicalSpace.TEMPORAL` family."""
return self.topology.get(TopologicalSpace.TEMPORAL)


_S = TypeVar("_S", bound="TopologicalExtentDatabaseRepresentation")
_R = TypeVar("_R")


class TopologicalExtentDatabaseRepresentation(Generic[_R]):
"""Mapping of in-memory representation of a region to DB representation.
An abstract base class whose subclasses provide a mapping from the
in-memory representation of a `TopologicalSpace` region to a
database-storage representation, and whose instances act like a
SQLAlchemy-based column expression.
"""

NAME: ClassVar[str]
"""Name to use for this logical column in tables (`str`).
If the representation actually uses multiple columns, this will just be
part of the names of those columns. Queries (and tables that represent
materialized queries) may use a different name (via the ``name`` parameters
to various methods) in order to disambiguate between the regions associated
with different tables.
"""

SPACE: ClassVar[TopologicalSpace]
"""Topological space where regions represented by this class exist.
"""

@classmethod
@abstractmethod
def makeFieldSpecs(
cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
) -> Tuple[ddl.FieldSpec, ...]:
"""Make objects that reflect the fields that must be added to table.
Makes one or more `ddl.FieldSpec` objects that reflect the fields
that must be added to a table for this representation.
Parameters
----------
nullable : `bool`
If `True`, the region is permitted to be logically ``NULL``
(mapped to `None` in Python), though the correspoding value(s) in
the database are implementation-defined. Nullable region fields
default to NULL, while others default to (-∞, ∞).
name : `str`, optional
Name for the logical column; a part of the name for multi-column
representations. Defaults to ``cls.NAME``.
**kwargs
Keyword arguments are forwarded to the `ddl.FieldSpec` constructor
for all fields; implementations only provide the ``name``,
``dtype``, and ``default`` arguments themselves.
Returns
-------
specs : `tuple` [ `ddl.FieldSpec` ]
Field specification objects; length of the tuple is
subclass-dependent, but is guaranteed to match the length of the
return values of `getFieldNames` and `update`.
"""
raise NotImplementedError()

@classmethod
@abstractmethod
def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
"""Return the actual field names used by this representation.
Parameters
----------
name : `str`, optional
Name for the logical column; a part of the name for multi-column
representations. Defaults to ``cls.NAME``.
Returns
-------
names : `tuple` [ `str` ]
Field name(s). Guaranteed to be the same as the names of the field
specifications returned by `makeFieldSpecs`.
"""
raise NotImplementedError()

@classmethod
@abstractmethod
def update(
cls, extent: Optional[_R], name: Optional[str] = None, result: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Add a region to a dictionary.
This region represents a database row in this representation.
Parameters
----------
extent
An instance of the region type this class provides a database
representation for, or `None` for ``NULL``.
name : `str`, optional
Name for the logical column; a part of the name for multi-column
representations. Defaults to ``cls.NAME``.
result : `dict` [ `str`, `Any` ], optional
A dictionary representing a database row that fields should be
added to, or `None` to create and return a new one.
Returns
-------
result : `dict` [ `str`, `Any` ]
A dictionary containing this representation of a region. Exactly
the `dict` passed as ``result`` if that is not `None`.
"""
raise NotImplementedError()

@classmethod
@abstractmethod
def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[_R]:
"""Extract a region from a dictionary.
This region represents a database row in this representation.
Parameters
----------
mapping : `Mapping` [ `str`, `Any` ]
A dictionary representing a database row containing a `Timespan`
in this representation. Should have key(s) equal to the return
value of `getFieldNames`.
name : `str`, optional
Name for the logical column; a part of the name for multi-column
representations. Defaults to ``cls.NAME``.
Returns
-------
region
Python representation of the region.
"""
raise NotImplementedError()

@classmethod
def hasExclusionConstraint(cls) -> bool:
"""Return `True` if this representation supports exclusion constraints.
Returns
-------
supported : `bool`
If `True`, defining a constraint via `ddl.TableSpec.exclusion` that
includes the fields of this representation is allowed.
"""
return False

@classmethod
@abstractmethod
def fromSelectable(
cls: Type[_S], selectable: sqlalchemy.sql.FromClause, name: Optional[str] = None
) -> _S:
"""Construct representation of a column in the table or subquery.
Constructs an instance that represents a logical column (which may
actually be backed by multiple columns) in the given table or subquery.
Parameters
----------
selectable : `sqlalchemy.sql.FromClause`
SQLAlchemy object representing a table or subquery.
name : `str`, optional
Name for the logical column; a part of the name for multi-column
representations. Defaults to ``cls.NAME``.
Returns
-------
representation : `TopologicalExtentDatabaseRepresentation`
Object representing a logical column.
"""
raise NotImplementedError()

@property
@abstractmethod
def name(self) -> str:
"""Return base logical name for the topological extent (`str`).
If the representation uses only one actual column, this should be the
full name of the column. In other cases it is an unspecified subset of
the column names.
"""
raise NotImplementedError()

@abstractmethod
def isNull(self) -> sqlalchemy.sql.ColumnElement:
"""Return expression that tests where region is ``NULL``.
Returns a SQLAlchemy expression that tests whether this region is
logically ``NULL``.
Returns
-------
isnull : `sqlalchemy.sql.ColumnElement`
A boolean SQLAlchemy expression object.
"""
raise NotImplementedError()

@abstractmethod
def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
"""Return the actual column(s) that comprise this logical column.
Parameters
----------
name : `str`, optional
If provided, a name for the logical column that should be used to
label the columns. If not provided, the columns' native names will
be used.
Returns
-------
columns : `Iterator` [ `sqlalchemy.sql.ColumnElement` ]
The true column or columns that back this object.
"""
raise NotImplementedError()


class SpatialRegionDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[lsst.sphgeom.Region]):
"""Class reflecting how spatial regions are represented inside the DB.
An instance of this class encapsulates how spatial regions on the sky are
represented in a database engine.
Instances should be constructed via `fromSelectable`, not by calling the
constructor directly.
Parameters
----------
column : `sqlalchemy.sql.ColumnElement`
Column containing the opaque byte-string, with automatic conversion to
`lsst.sphgeom.Region` implemented via SQLAlchemy hooks.
name : `str`
Name of the column.
Notes
-----
Unlike `TimespanDatabaseRepresentation`, this is a concrete class, because
we currently do not support any database-native spatial regions, and
instead rely on precomputed overlaps and opaque (to the database) byte
string columns. As a result, it also does not support any in-database
topological predicates.
If we add support for database-native regions in the future, this class may
become an ABC with multiple concrete implementations.
"""

def __init__(self, column: sqlalchemy.sql.ColumnElement, name: str):
self.column = column
self._name = name

NAME: ClassVar[str] = "region"
SPACE: ClassVar[TopologicalSpace] = TopologicalSpace.SPATIAL

@classmethod
def makeFieldSpecs(
cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
) -> Tuple[ddl.FieldSpec, ...]:
# Docstring inherited.
if name is None:
name = cls.NAME
# Most regions are small (they're quadrilaterals), but visit ones can
# be quite large because they have a complicated boundary. For HSC,
# that's about ~1400 bytes, and I've just rounded up to the nearest
# power of two. Given what we now know about variable-length TEXT
# having no performance penalties in PostgreSQL and SQLite vs.
# fixed-length strings, there's probably a variable-length bytes type
# we should be using instead, but that's a schema change and hence
# something we won't be doing anytime soon.
return (ddl.FieldSpec(name, nbytes=2048, dtype=ddl.Base64Region),)

@classmethod
def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
# Docstring inherited.
if name is None:
name = cls.NAME
return (name,)

@classmethod
def update(
cls,
extent: Optional[lsst.sphgeom.Region],
name: Optional[str] = None,
result: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
# Docstring inherited.
if name is None:
name = cls.NAME
if result is None:
result = {}
result[name] = extent
return result

@classmethod
def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[lsst.sphgeom.Region]:
# Docstring inherited.
if name is None:
name = cls.NAME
return mapping[name]

@classmethod
def fromSelectable(
cls: Type[SpatialRegionDatabaseRepresentation],
selectable: sqlalchemy.sql.FromClause,
name: Optional[str] = None,
) -> SpatialRegionDatabaseRepresentation:
# Docstring inherited
if name is None:
name = cls.NAME
return cls(selectable.columns[name], name)

@property
def name(self) -> str:
# Docstring inherited
return self._name

def isNull(self) -> sqlalchemy.sql.ColumnElement:
# Docstring inherited
return self.column.is_(None)

def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
# Docstring inherited
yield self.column
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/datasets/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .ref import DatasetRef


@dataclass
@dataclass(frozen=True, eq=True)
class DatasetAssociation:
"""Class representing the membership of a dataset in a single collection.
Expand Down

0 comments on commit 1a6169c

Please sign in to comment.