lsst · andy-slac · Nov 16, 2021 · Oct 31, 2021 · Nov 8, 2021 · Nov 10, 2021
diff --git a/doc/changes/registry/DM-32403.feature.md b/doc/changes/registry/DM-32403.feature.md
@@ -0,0 +1 @@
+Iterables returned from registry methods `queryDataIds` and `queryDimensionRecords` have two new methods - `order_by` and `limit`.
diff --git a/doc/lsst.daf.butler/queries.rst b/doc/lsst.daf.butler/queries.rst
@@ -385,3 +385,28 @@ Few examples of valid expressions using some of the constructs:
     visit.datetime_end < T'mjd/58938.515/tai'
 
     ingest_date < T'2020-11-06 21:10:00'
+
+
+.. _daf_butler_query_ordering:
+
+Query result ordering
+---------------------
+
+Few query methods (`~Registry.queryDataIds` and `~Registry.queryDimensionRecords`) support special constructs for ordering and limiting the number of the returned records. These methods return iterable objects which have ``order_by()`` and ``limit()`` methods. Methods modify the iterable object and should be used before iterating over resulting records, for convenience the methods can be chained, see example below.
+
+The ``order_by()`` method accepts a variable number of positional arguments specifying columns/fields used for ordering, each argument can have one of the supported formats:
+
+- A dimension name, corresponding to the value of the dimension primary key, e.g. ``"visit"``
+- A dimension name and a field name separated bey a dot. Field name can refer to any of the dimension's metadata or key, e.g. ``"visit.name"``, ``"detector.raft"``. Special field names ``"timespan.begin"`` and ``"timespan.end"`` can be used for temporal dimensions (visit and exposure).
+- A field name without dimension name, in that case field is searched in all dimensions used by the query, and it has to be unique. E.g. ``"cell_x"`` means the same as ``"patch.cell_x"``.
+- To reverse ordering for the field it is prefixed with a minus sign, e.g. ``"-visit.timespan.begin"``.
+
+The ``limit()`` method accepts two positional integer arguments - limit for the number of returned records and offset (number of records to skip). The offset argument is optional, if not provided it is equivalent to offset 0.
+
+Example of use of these two methods:
+
+.. code-block:: Python
+
+    # Print ten latest visit records in reverse time order
+    for record in registry.queryDimensionRecords("visit").order_by("-timespan.begin").limit(10):
+        print(record)
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -583,7 +583,7 @@ def _rewrite_data_id(self, dataId: Optional[DataId], datasetType: DatasetType,
         -------
         dataId : `dict` or `DataCoordinate`
             The, possibly rewritten, dataId. If given a `DataCoordinate` and
-            no keyword arguments, the orginal dataId will be returned
+            no keyword arguments, the original dataId will be returned
             unchanged.
         **kwargs : `dict`
             Any unused keyword arguments.

diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -37,7 +37,10 @@
     directory_argument,
     element_argument,
     glob_argument,
+    limit_option,
+    offset_option,
     options_file_option,
+    order_by_option,
     query_datasets_options,
     register_dataset_types_option,
     repo_argument,
@@ -432,6 +435,9 @@ def certify_calibrations(*args, **kwargs):
                              "physical_filter" values to only those for which at least one "raw" dataset
                              exists in "collections".  Requires --collections."""))
 @where_option(help=where_help)
+@order_by_option()
+@limit_option()
+@offset_option()
 @options_file_option()
 def query_data_ids(**kwargs):
     """List the data IDs in a repository.
@@ -454,6 +460,9 @@ def query_data_ids(**kwargs):
                              --collections."""))
 @collections_option(help=collections_option.help + " May only be used with --datasets.")
 @where_option(help=where_help)
+@order_by_option()
+@limit_option()
+@offset_option()
 @click.option("--no-check", is_flag=True,
               help=unwrap("""Don't check the query before execution. By default the query is checked before it
                           executed, this may reject some valid queries that resemble common mistakes."""))

diff --git a/python/lsst/daf/butler/cli/opt/options.py b/python/lsst/daf/butler/cli/opt/options.py
@@ -183,3 +183,29 @@ def makeCollectionTypes(context, param, value):
 
 where_option = MWOptionDecorator("--where",
                                  help="A string expression similar to a SQL WHERE clause.")
+
+
+order_by_option = MWOptionDecorator(
+    "--order-by",
+    help=unwrap("""One or more comma-separated names used to order records. Names can be dimension names,
+                metadata names optionally prefixed by a dimension name and dot, or
+                timestamp_begin/timestamp_end (with optional dimension name). To reverse ordering for a name
+                prefix it with a minus sign."""),
+    multiple=True,
+    callback=split_commas
+)
+
+
+limit_option = MWOptionDecorator(
+    "--limit",
+    help=unwrap("Limit the number of records, by default all records are shown."),
+    type=int,
+    default=0
+)
+
+offset_option = MWOptionDecorator(
+    "--offset",
+    help=unwrap("Skip initial number of records, only used when --limit is specified."),
+    type=int,
+    default=0
+)
diff --git a/python/lsst/daf/butler/core/_topology.py b/python/lsst/daf/butler/core/_topology.py
@@ -212,7 +212,7 @@ class TopologicalExtentDatabaseRepresentation(Generic[_R]):
     @abstractmethod
     def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
                        ) -> Tuple[ddl.FieldSpec, ...]:
-        """Make objects that relfect the fields that must be added to table.
+        """Make objects that reflect the fields that must be added to table.
 
         Makes one or more `ddl.FieldSpec` objects that reflect the fields
         that must be added to a table for this representation.

diff --git a/python/lsst/daf/butler/core/config.py b/python/lsst/daf/butler/core/config.py
@@ -1070,7 +1070,7 @@ class ConfigSubset(Config):
         consistency.
     mergeDefaults : `bool`, optional
         If `True` defaults will be read and the supplied config will
-        be combined with the defaults, with the supplied valiues taking
+        be combined with the defaults, with the supplied values taking
         precedence.
     searchPaths : `list` or `tuple`, optional
         Explicit additional paths to search for defaults. They should
@@ -1128,7 +1128,7 @@ def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=No
             if searchPaths:
                 fullSearchPath.extend(searchPaths)
 
-            # Read default paths from enviroment
+            # Read default paths from environment
             fullSearchPath.extend(self.defaultSearchPaths())
 
             # There are two places to find defaults for this particular config

diff --git a/python/lsst/daf/butler/core/datasets/type.py b/python/lsst/daf/butler/core/datasets/type.py
@@ -243,7 +243,7 @@ def __lt__(self, other: Any) -> bool:
     def name(self) -> str:
         """Return a string name for the Dataset.
 
-        Mmust correspond to the same `DatasetType` across all Registries.
+        Must correspond to the same `DatasetType` across all Registries.
         """
         return self._name
 
@@ -514,7 +514,7 @@ def from_simple(cls, simple: SerializedDatasetType,
                     registry: Optional[Registry] = None) -> DatasetType:
         """Construct a new object from the simplified form.
 
-        This is usally data returned from the `to_simple` method.
+        This is usually data returned from the `to_simple` method.
 
         Parameters
         ----------

diff --git a/python/lsst/daf/butler/core/dimensions/_coordinate.py b/python/lsst/daf/butler/core/dimensions/_coordinate.py
@@ -102,7 +102,7 @@ class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]):
     An immutable data ID dictionary that guarantees that its key-value pairs
     identify at least all required dimensions in a `DimensionGraph`.
 
-    `DataCoordinateSet` itself is an ABC, but provides `staticmethod` factory
+    `DataCoordinate` itself is an ABC, but provides `staticmethod` factory
     functions for private concrete implementations that should be sufficient
     for most purposes.  `standardize` is the most flexible and safe of these;
     the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are

diff --git a/python/lsst/daf/butler/core/dimensions/_dataCoordinateIterable.py b/python/lsst/daf/butler/core/dimensions/_dataCoordinateIterable.py
@@ -195,7 +195,7 @@ def subset(self, graph: DimensionGraph) -> DataCoordinateIterable:
             May be ``self`` if ``graph == self.graph``.  Elements are
             equivalent to those that would be created by calling
             `DataCoordinate.subset` on all elements in ``self``, possibly
-            with deduplication and/or reordeding (depending on the subclass,
+            with deduplication and/or reordering (depending on the subclass,
             which may make more specific guarantees).
         """
         raise NotImplementedError()

diff --git a/python/lsst/daf/butler/core/dimensions/_database.py b/python/lsst/daf/butler/core/dimensions/_database.py
@@ -197,7 +197,7 @@ def __init__(
 
     @property
     def name(self) -> str:
-        # Docstring inherited from TopoogicalRelationshipEndpoint.
+        # Docstring inherited from TopologicalRelationshipEndpoint.
         return self._name
 
     @property

diff --git a/python/lsst/daf/butler/core/dimensions/_elements.py b/python/lsst/daf/butler/core/dimensions/_elements.py
@@ -65,7 +65,7 @@ class DimensionElement(TopologicalRelationshipEndpoint):
 
     Notes
     -----
-    `DimensionElement` instances should always be constructed by and retreived
+    `DimensionElement` instances should always be constructed by and retrieved
     from a `DimensionUniverse`.  They are immutable after they are fully
     constructed, and should never be copied.
 

diff --git a/python/lsst/daf/butler/core/dimensions/_governor.py b/python/lsst/daf/butler/core/dimensions/_governor.py
@@ -120,7 +120,7 @@ def __init__(
 
     @property
     def name(self) -> str:
-        # Docstring inherited from TopoogicalRelationshipEndpoint.
+        # Docstring inherited from TopologicalRelationshipEndpoint.
         return self._name
 
     @property

diff --git a/python/lsst/daf/butler/core/dimensions/_graph.py b/python/lsst/daf/butler/core/dimensions/_graph.py
@@ -83,7 +83,7 @@ class DimensionGraph:
         be included.  At most one of ``dimensions`` and ``names`` must be
         provided.
     names : iterable of `str`, optional
-        An iterable of the names of dimensiosn that must be included in the
+        An iterable of the names of dimensions that must be included in the
         graph.  All (recursive) dependencies of these dimensions will also
         be included.  At most one of ``dimensions`` and ``names`` must be
         provided.
@@ -454,8 +454,8 @@ def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
     """
 
     required: NamedValueAbstractSet[Dimension]
-    """The subset of `dimensions` whose elments must be directly identified via
-    their primary keys in a data ID in order to identify the rest of the
+    """The subset of `dimensions` whose elements must be directly identified
+    via their primary keys in a data ID in order to identify the rest of the
     elements in the graph (`NamedValueAbstractSet` of `Dimension`).
     """
 

diff --git a/python/lsst/daf/butler/core/dimensions/_universe.py b/python/lsst/daf/butler/core/dimensions/_universe.py
@@ -392,7 +392,7 @@ def makePacker(self, name: str, dataId: DataCoordinate) -> DimensionPacker:
             Name of the packer, matching a key in the "packers" section of the
             dimension configuration.
         dataId : `DataCoordinate`
-            Fully-expanded data ID that identfies the at least the "fixed"
+            Fully-expanded data ID that identifies the at least the "fixed"
             dimensions of the packer (i.e. those that are assumed/given,
             setting the space over which packed integer IDs are unique).
             ``dataId.hasRecords()`` must return `True`.

diff --git a/python/lsst/daf/butler/core/dimensions/construction.py b/python/lsst/daf/butler/core/dimensions/construction.py
@@ -106,7 +106,7 @@ class DimensionConstructionBuilder:
     `DimensionConstructionVisitor` objects can be added to a
     `DimensionConstructionBuilder` object in any order, and are invoked
     in a deterministic order consistent with their dependency relationships
-    by a single call (by the `DimensionUnvierse`) to the `finish` method.
+    by a single call (by the `DimensionUniverse`) to the `finish` method.
 
     Parameters
     ----------

diff --git a/python/lsst/daf/butler/core/formatter.py b/python/lsst/daf/butler/core/formatter.py
@@ -391,7 +391,7 @@ def validateExtension(cls, location: Location) -> None:
             raise NotImplementedError("No file extension registered with this formatter") from None
 
         # If extension is implemented as an instance property it won't return
-        # a string when called as a class propertt. Assume that
+        # a string when called as a class property. Assume that
         # the supported extensions class property is complete.
         if default is not None and isinstance(default, str):
             supported.add(default)

diff --git a/python/lsst/daf/butler/core/named.py b/python/lsst/daf/butler/core/named.py
@@ -254,7 +254,7 @@ def freeze(self) -> NamedKeyMapping[K, V]:
         -------
         self : `NamedKeyMapping`
             While ``self`` is modified in-place, it is also returned with a
-            type anotation that reflects its new, frozen state; assigning it
+            type annotation that reflects its new, frozen state; assigning it
             to a new variable (and considering any previous references
             invalidated) should allow for more accurate static type checking.
         """
@@ -558,7 +558,7 @@ def freeze(self) -> NamedValueAbstractSet[K]:
         -------
         self : `NamedValueAbstractSet`
             While ``self`` is modified in-place, it is also returned with a
-            type anotation that reflects its new, frozen state; assigning it
+            type annotation that reflects its new, frozen state; assigning it
             to a new variable (and considering any previous references
             invalidated) should allow for more accurate static type checking.
         """

diff --git a/python/lsst/daf/butler/core/storageClassDelegate.py b/python/lsst/daf/butler/core/storageClassDelegate.py
@@ -374,7 +374,7 @@ def handleParameters(self, inMemoryDataset: Any, parameters: Optional[Mapping[st
 
     @classmethod
     def selectResponsibleComponent(cls, derivedComponent: str, fromComponents: Set[Optional[str]]) -> str:
-        """Select the best component for calcluating a derived component.
+        """Select the best component for calculating a derived component.
 
         Given a possible set of components to choose from, return the
         component that should be used to calculate the requested derived

diff --git a/python/lsst/daf/butler/core/timespan.py b/python/lsst/daf/butler/core/timespan.py
@@ -614,7 +614,7 @@ class TimespanDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[Tim
 
     Compound: ClassVar[Type[TimespanDatabaseRepresentation]]
     """A concrete subclass of `TimespanDatabaseRepresentation` that simply
-    uses two separate fields for the begin (inclusive) and end (excusive)
+    uses two separate fields for the begin (inclusive) and end (exclusive)
     endpoints.
 
     This implementation should be compatible with any SQL database, and should
@@ -749,6 +749,44 @@ def contains(self: _S, other: Union[_S, sqlalchemy.sql.ColumnElement]) -> sqlalc
         """
         raise NotImplementedError()
 
+    @abstractmethod
+    def lower(self: _S) -> sqlalchemy.sql.ColumnElement:
+        """Return a SQLAlchemy expression representing a lower bound of a
+        timespan.
+
+        Returns
+        -------
+        lower : `sqlalchemy.sql.ColumnElement`
+            A SQLAlchemy expression for a lower bound.
+
+        Notes
+        -----
+        If database holds ``NULL`` for a timespan then the returned expression
+        should evaluate to 0. Main purpose of this and `upper` method is to use
+        them in generating SQL, in particular ORDER BY clause, to guarantee a
+        predictable ordering. It may potentially be used for transforming
+        boolean user expressions into SQL, but it will likely require extra
+        attention to ordering issues.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def upper(self: _S) -> sqlalchemy.sql.ColumnElement:
+        """Return a SQLAlchemy expression representing an upper bound of a
+        timespan.
+
+        Returns
+        -------
+        upper : `sqlalchemy.sql.ColumnElement`
+            A SQLAlchemy expression for an upper bound.
+
+        Notes
+        -----
+        If database holds ``NULL`` for a timespan then the returned expression
+        should evaluate to 0. Also see notes for `lower` method.
+        """
+        raise NotImplementedError()
+
 
 class _CompoundTimespanDatabaseRepresentation(TimespanDatabaseRepresentation):
     """Representation of a time span as two separate fields.
@@ -919,6 +957,14 @@ def contains(
         else:
             return sqlalchemy.sql.and_(self._nsec[0] <= other._nsec[0], self._nsec[1] >= other._nsec[1])
 
+    def lower(self) -> sqlalchemy.sql.ColumnElement:
+        # Docstring inherited.
+        return sqlalchemy.sql.functions.coalesce(self._nsec[0], sqlalchemy.sql.literal(0))
+
+    def upper(self) -> sqlalchemy.sql.ColumnElement:
+        # Docstring inherited.
+        return sqlalchemy.sql.functions.coalesce(self._nsec[1], sqlalchemy.sql.literal(0))
+
     def flatten(self, name: Optional[str] = None) -> Iterator[sqlalchemy.sql.ColumnElement]:
         # Docstring inherited.
         if name is None:

diff --git a/python/lsst/daf/butler/datastores/chainedDatastore.py b/python/lsst/daf/butler/datastores/chainedDatastore.py
@@ -581,7 +581,7 @@ def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI:
         primary, components = self.getURIs(ref, predict)
         if primary is None or components:
             raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. "
-                               "Use Dataastore.getURIs() instead.")
+                               "Use Datastore.getURIs() instead.")
         return primary
 
     def retrieveArtifacts(self, refs: Iterable[DatasetRef],

diff --git a/python/lsst/daf/butler/datastores/fileDatastore.py b/python/lsst/daf/butler/datastores/fileDatastore.py
@@ -843,7 +843,7 @@ def _extractIngestInfo(self, path: Union[str, ButlerURI], ref: DatasetRef, *,
         -------
         info : `StoredFileInfo`
             Internal datastore record for this file.  This will be inserted by
-            the caller; the `_extractIngestInfo` is only resposible for
+            the caller; the `_extractIngestInfo` is only responsible for
             creating and populating the struct.
 
         Raises
@@ -1016,7 +1016,7 @@ def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) ->
         Returns
         -------
         info : `StoredFileInfo`
-            Information describin the artifact written to the datastore.
+            Information describing the artifact written to the datastore.
         """
         location, formatter = self._prepare_for_put(inMemoryDataset, ref)
         uri = location.uri
@@ -1588,7 +1588,7 @@ def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI:
         primary, components = self.getURIs(ref, predict)
         if primary is None or components:
             raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. "
-                               "Use Dataastore.getURIs() instead.")
+                               "Use Datastore.getURIs() instead.")
         return primary
 
     def retrieveArtifacts(self, refs: Iterable[DatasetRef],