lsst · dhirving · Mar 28, 2024 · Mar 26, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/python/lsst/daf/butler/_exceptions.py b/python/lsst/daf/butler/_exceptions.py
@@ -34,10 +34,11 @@
     "DatasetTypeNotSupportedError",
     "EmptyQueryResultError",
     "MissingDatasetTypeError",
+    "MissingCollectionError",
     "ValidationError",
 )
 
-from ._exceptions_legacy import DataIdError, DatasetTypeError
+from ._exceptions_legacy import CollectionError, DataIdError, DatasetTypeError
 
 
 class ButlerUserError(Exception):
@@ -92,6 +93,14 @@ class DimensionNameError(KeyError, DataIdError, ButlerUserError):
     error_type = "dimension_name"
 
 
+class MissingCollectionError(CollectionError, ButlerUserError):
+    """Exception raised when an operation attempts to use a collection that
+    does not exist.
+    """
+
+    error_type = "missing_collection"
+
+
 class MissingDatasetTypeError(DatasetTypeError, KeyError, ButlerUserError):
     """Exception raised when a dataset type does not exist."""
 
@@ -145,6 +154,7 @@ class UnknownButlerUserError(ButlerUserError):
     CalibrationLookupError,
     DimensionNameError,
     DatasetNotFoundError,
+    MissingCollectionError,
     MissingDatasetTypeError,
     UnknownButlerUserError,
 )

diff --git a/python/lsst/daf/butler/_exceptions_legacy.py b/python/lsst/daf/butler/_exceptions_legacy.py
@@ -55,3 +55,7 @@ class DataIdError(RegistryError):
 
 class DatasetTypeError(RegistryError):
     """Exception raised for problems with dataset types."""
+
+
+class CollectionError(RegistryError):
+    """Exception raised for collection-related errors."""
diff --git a/python/lsst/daf/butler/registry/__init__.py b/python/lsst/daf/butler/registry/__init__.py
@@ -27,8 +27,8 @@
 
 # Re-export some top-level exception types for backwards compatibility -- these
 # used to be part of registry.
-from .._exceptions import DimensionNameError, MissingDatasetTypeError
-from .._exceptions_legacy import DataIdError, DatasetTypeError, RegistryError
+from .._exceptions import DimensionNameError, MissingCollectionError, MissingDatasetTypeError
+from .._exceptions_legacy import CollectionError, DataIdError, DatasetTypeError, RegistryError
 
 # Registry imports.
 from . import interfaces, managers, queries, wildcards

diff --git a/python/lsst/daf/butler/registry/_collection_summary.py b/python/lsst/daf/butler/registry/_collection_summary.py
@@ -26,16 +26,18 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from __future__ import annotations
 
-__all__ = ("CollectionSummary",)
+__all__ = ("CollectionSummary", "SerializedCollectionSummary")
 
 import dataclasses
 from collections.abc import Generator, Iterable, Mapping, Set
 from typing import cast
 
+import pydantic
+
 from .._dataset_ref import DatasetRef
-from .._dataset_type import DatasetType
+from .._dataset_type import DatasetType, SerializedDatasetType
 from .._named import NamedValueSet
-from ..dimensions import DataCoordinate
+from ..dimensions import DataCoordinate, DimensionUniverse
 
 
 @dataclasses.dataclass
@@ -52,8 +54,7 @@ def copy(self) -> CollectionSummary:
             at all.
         """
         return CollectionSummary(
-            dataset_types=self.dataset_types.copy(),
-            governors={k: v.copy() for k, v in self.governors.items()},
+            dataset_types=self.dataset_types.copy(), governors=_copy_governors(self.governors)
         )
 
     def add_datasets_generator(self, refs: Iterable[DatasetRef]) -> Generator[DatasetRef, None, None]:
@@ -223,6 +224,21 @@ def is_compatible_with(
                 return False
         return True
 
+    def to_simple(self) -> SerializedCollectionSummary:
+        return SerializedCollectionSummary(
+            dataset_types=[x.to_simple() for x in self.dataset_types],
+            governors=_copy_governors(self.governors),
+        )
+
+    @staticmethod
+    def from_simple(simple: SerializedCollectionSummary, universe: DimensionUniverse) -> CollectionSummary:
+        summary = CollectionSummary()
+        summary.dataset_types = NamedValueSet(
+            [DatasetType.from_simple(x, universe) for x in simple.dataset_types]
+        )
+        summary.governors = _copy_governors(simple.governors)
+        return summary
+
     dataset_types: NamedValueSet[DatasetType] = dataclasses.field(default_factory=NamedValueSet)
     """Dataset types that may be present in the collection
     (`NamedValueSet` [ `DatasetType` ]).
@@ -241,3 +257,15 @@ def is_compatible_with(
     IDs, and hence the values of those data IDs are unconstrained by this
     collection in the query.
     """
+
+
+def _copy_governors(governors: dict[str, set[str]]) -> dict[str, set[str]]:
+    """Make an independent copy of the 'governors' data structure."""
+    return {k: v.copy() for k, v in governors.items()}
+
+
+class SerializedCollectionSummary(pydantic.BaseModel):
+    """Serialized version of CollectionSummary."""
+
+    dataset_types: list[SerializedDatasetType]
+    governors: dict[str, set[str]]
diff --git a/python/lsst/daf/butler/registry/_defaults.py b/python/lsst/daf/butler/registry/_defaults.py
@@ -35,9 +35,9 @@
 
 from lsst.utils.classes import immutable
 
+from .._exceptions import MissingCollectionError
 from ..dimensions import DataCoordinate
 from ._collection_summary import CollectionSummary
-from ._exceptions import MissingCollectionError
 from .wildcards import CollectionWildcard
 
 if TYPE_CHECKING:

diff --git a/python/lsst/daf/butler/registry/_exceptions.py b/python/lsst/daf/butler/registry/_exceptions.py
@@ -28,14 +28,12 @@
 
 __all__ = (
     "ArgumentError",
-    "CollectionError",
     "CollectionExpressionError",
     "CollectionTypeError",
     "ConflictingDefinitionError",
     "DataIdValueError",
     "DatasetTypeExpressionError",
     "InconsistentDataIdError",
-    "MissingCollectionError",
     "MissingSpatialOverlapError",
     "NoDefaultCollectionError",
     "OrphanedRecordError",
@@ -45,7 +43,7 @@
     "UserExpressionSyntaxError",
 )
 
-from .._exceptions_legacy import DataIdError, RegistryError
+from .._exceptions_legacy import CollectionError, DataIdError, RegistryError
 
 
 class ArgumentError(RegistryError):
@@ -66,10 +64,6 @@ class InconsistentDataIdError(DataIdError):
     """
 
 
-class CollectionError(RegistryError):
-    """Exception raised for collection-related errors."""
-
-
 class CollectionTypeError(CollectionError):
     """Exception raised when type of a collection is incorrect."""
 
@@ -78,12 +72,6 @@ class CollectionExpressionError(CollectionError):
     """Exception raised for an incorrect collection expression."""
 
 
-class MissingCollectionError(CollectionError):
-    """Exception raised when an operation attempts to use a collection that
-    does not exist.
-    """
-
-
 class NoDefaultCollectionError(CollectionError):
     """Exception raised when a collection is needed, but collection argument
     is not provided and default collection is not defined in registry.

diff --git a/python/lsst/daf/butler/registry/collections/_base.py b/python/lsst/daf/butler/registry/collections/_base.py
@@ -38,9 +38,9 @@
 
 import sqlalchemy
 
+from ..._exceptions import MissingCollectionError
 from ...timespan_database_representation import TimespanDatabaseRepresentation
 from .._collection_type import CollectionType
-from .._exceptions import MissingCollectionError
 from ..interfaces import ChainedCollectionRecord, CollectionManager, CollectionRecord, RunRecord, VersionTuple
 from ..wildcards import CollectionWildcard
 

diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py
@@ -494,7 +494,7 @@ def getCollectionType(self, name: str) -> CollectionType:
         """
         return self._managers.collections.find(name).type
 
-    def _get_collection_record(self, name: str) -> CollectionRecord:
+    def get_collection_record(self, name: str) -> CollectionRecord:
         """Return the record for this collection.
 
         Parameters

diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py
@@ -56,7 +56,7 @@
 from ..._dataset_association import DatasetAssociation
 from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
 from ..._dataset_type import DatasetType
-from ..._exceptions import MissingDatasetTypeError
+from ..._exceptions import MissingCollectionError, MissingDatasetTypeError
 from ..._exceptions_legacy import DatasetTypeError
 from ..._storage_class import StorageClass
 from ..._timespan import Timespan
@@ -72,7 +72,6 @@
     DataIdValueError,
     DatasetTypeExpressionError,
     InconsistentDataIdError,
-    MissingCollectionError,
     NoDefaultCollectionError,
     OrphanedRecordError,
 )
@@ -100,6 +99,10 @@ class RegistryTests(ABC):
     in default configuration (`str` or `dict`).
     """
 
+    supportsCollectionRegex: bool = True
+    """True if the registry class being tested supports regex searches for
+    collections."""
+
     @classmethod
     @abstractmethod
     def getDataDir(cls) -> str:
@@ -770,16 +773,30 @@ def testCollections(self):
         registry.setCollectionChain(chain2, [run2, chain1])
         self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
         self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
-        # Query for collections matching a regex.
+
+        if self.supportsCollectionRegex:
+            # Query for collections matching a regex.
+            self.assertCountEqual(
+                list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
+                ["imported_r", "imported_g"],
+            )
+            # Query for collections matching a regex or an explicit str.
+            self.assertCountEqual(
+                list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
+                ["imported_r", "imported_g", "chain1"],
+            )
+        # Same queries as the regex ones above, but using globs instead of
+        # regex.
         self.assertCountEqual(
-            list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
+            list(registry.queryCollections("imported_*", flattenChains=False)),
             ["imported_r", "imported_g"],
         )
         # Query for collections matching a regex or an explicit str.
         self.assertCountEqual(
-            list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
+            list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
             ["imported_r", "imported_g", "chain1"],
         )
+
         # Search for bias with dataId1 should find it via tag1 in chain2,
         # recursing, because is not in run1.
         self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))

diff --git a/python/lsst/daf/butler/remote_butler/_registry.py b/python/lsst/daf/butler/remote_butler/_registry.py
@@ -31,6 +31,8 @@
 from collections.abc import Iterable, Iterator, Mapping, Sequence
 from typing import Any
 
+from lsst.utils.iteration import ensure_iterable
+
 from .._dataset_association import DatasetAssociation
 from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
 from .._dataset_type import DatasetType
@@ -51,13 +53,15 @@
     CollectionArgType,
     CollectionSummary,
     CollectionType,
+    CollectionTypeError,
     DatasetTypeError,
     Registry,
     RegistryDefaults,
 )
 from ..registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
 from ..remote_butler import RemoteButler
 from ._collection_args import convert_collection_arg_to_glob_string_list
+from .server_models import QueryCollectionsRequestModel
 
 
 class RemoteButlerRegistry(Registry):
@@ -106,7 +110,7 @@
         raise NotImplementedError()
 
     def getCollectionType(self, name: str) -> CollectionType:
-        raise NotImplementedError()
+        return self._butler._get_collection_info(name).type
 
     def registerRun(self, name: str, doc: str | None = None) -> bool:
         raise NotImplementedError()
@@ -115,22 +119,28 @@
         raise NotImplementedError()
 
     def getCollectionChain(self, parent: str) -> Sequence[str]:
-        raise NotImplementedError()
+        info = self._butler._get_collection_info(parent)
+        if info.type is not CollectionType.CHAINED:
+            raise CollectionTypeError(f"Collection '{parent}' has type {info.type.name}, not CHAINED.")
+        return info.children
 
     def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
         raise NotImplementedError()
 
     def getCollectionParentChains(self, collection: str) -> set[str]:
-        raise NotImplementedError()
+        info = self._butler._get_collection_info(collection, include_parents=True)
+        assert info.parents is not None, "Requested list of parents from server, but it did not send them."
+        return info.parents
 
     def getCollectionDocumentation(self, collection: str) -> str | None:
-        raise NotImplementedError()
+        info = self._butler._get_collection_info(collection, include_doc=True)
+        return info.doc
 
     def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
         raise NotImplementedError()
 
     def getCollectionSummary(self, collection: str) -> CollectionSummary:
-        raise NotImplementedError()
+        return self._butler._get_collection_summary(collection)
 
     def registerDatasetType(self, datasetType: DatasetType) -> bool:
         raise NotImplementedError()
@@ -271,7 +281,15 @@
         flattenChains: bool = False,
         includeChains: bool | None = None,
     ) -> Sequence[str]:
-        raise NotImplementedError()
+        if includeChains is None:
+            includeChains = not flattenChains
+        query = QueryCollectionsRequestModel(
+            search=convert_collection_arg_to_glob_string_list(expression),
+            collection_types=list(ensure_iterable(collectionTypes)),
+            flatten_chains=flattenChains,
+            include_chains=includeChains,
+        )
+        return self._butler._query_collections(query).collections
 
     def queryDatasets(
         self,