lsst · timj · Jun 13, 2023 · Jun 8, 2023 · Jun 12, 2023 · Jun 12, 2023
diff --git a/doc/lsst.daf.butler/organizing.rst b/doc/lsst.daf.butler/organizing.rst
@@ -14,7 +14,6 @@ We call a `DatasetRef` whose `~DatasetRef.id` attribute is not `None` a *resolve
 
     In most data repositories, dataset IDs are 128-bit UUIDs that are guaranteed to be unique across all data repositories, not just within one; if two datasets share the same UUID in different data repositories, they must be identical (this is possible because of the extraordinarily low probability of a collision between two random 128-bit numbers, and our reservation of deterministic UUIDs for very special datasets).
     As a result, we also frequently refer to the dataset ID as the UUID, especially in contexts where UUIDs are actually needed or can be safely assumed.
-    But 64-bit autoincrement integers are also supported (albeit mostly for legacy reasons), and we continue to use "dataset ID" in most code and documentation to refer to either form.
 
 Most of the time, however, users identify a dataset using a combination of three other attributes:
 

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
diff --git a/python/lsst/daf/butler/_butlerConfig.py b/python/lsst/daf/butler/_butlerConfig.py
@@ -28,7 +28,7 @@
 
 import copy
 import os
-from typing import Optional, Sequence, Union
+from collections.abc import Sequence
 
 from lsst.resources import ResourcePath, ResourcePathExpression
 
@@ -66,10 +66,10 @@ class ButlerConfig(Config):
 
     def __init__(
         self,
-        other: Optional[Union[ResourcePathExpression, Config]] = None,
+        other: ResourcePathExpression | Config | None = None,
         searchPaths: Sequence[ResourcePathExpression] | None = None,
     ):
-        self.configDir: Optional[ResourcePath] = None
+        self.configDir: ResourcePath | None = None
 
         # If this is already a ButlerConfig we assume that defaults
         # have already been loaded.

diff --git a/python/lsst/daf/butler/_butlerRepoIndex.py b/python/lsst/daf/butler/_butlerRepoIndex.py
@@ -24,7 +24,7 @@
 __all__ = ("ButlerRepoIndex",)
 
 import os
-from typing import ClassVar, Dict, Set
+from typing import ClassVar
 
 from lsst.resources import ResourcePath
 
@@ -51,7 +51,7 @@ class ButlerRepoIndex:
     index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
     """The name of the environment variable to read to locate the index."""
 
-    _cache: ClassVar[Dict[ResourcePath, Config]] = {}
+    _cache: ClassVar[dict[ResourcePath, Config]] = {}
     """Cache of indexes. In most scenarios only one index will be found
     and the environment will not change. In tests this may not be true."""
 
@@ -140,7 +140,7 @@ def _read_repository_index_from_environment(cls) -> Config:
         return repo_index
 
     @classmethod
-    def get_known_repos(cls) -> Set[str]:
+    def get_known_repos(cls) -> set[str]:
         """Retrieve the list of known repository labels.
 
         Returns

diff --git a/python/lsst/daf/butler/_deferredDatasetHandle.py b/python/lsst/daf/butler/_deferredDatasetHandle.py
@@ -27,7 +27,7 @@
 __all__ = ("DeferredDatasetHandle",)
 
 import dataclasses
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
     from ._limited_butler import LimitedButler
@@ -41,8 +41,8 @@ class DeferredDatasetHandle:
     def get(
         self,
         *,
-        component: Optional[str] = None,
-        parameters: Optional[dict] = None,
+        component: str | None = None,
+        parameters: dict | None = None,
         storageClass: str | StorageClass | None = None,
     ) -> Any:
         """Retrieves the dataset pointed to by this handle
@@ -104,10 +104,10 @@ def dataId(self) -> DataCoordinate:
     """Reference to the dataset (`DatasetRef`).
     """
 
-    parameters: Optional[dict]
+    parameters: dict | None
     """Optional parameters that may be used to specify a subset of the dataset
     to be loaded (`dict` or `None`).
     """
 
-    storageClass: Optional[Union[str, StorageClass]] = None
+    storageClass: str | StorageClass | None = None
     """Optional storage class override that can be applied on ``get()``."""
diff --git a/python/lsst/daf/butler/_limited_butler.py b/python/lsst/daf/butler/_limited_butler.py
@@ -25,7 +25,8 @@
 
 import logging
 from abc import ABC, abstractmethod
-from typing import Any, ClassVar, Dict, Iterable, Optional, Union
+from collections.abc import Iterable
+from typing import Any, ClassVar
 
 from deprecated.sphinx import deprecated
 
@@ -172,7 +173,7 @@ def getDirect(
         self,
         ref: DatasetRef,
         *,
-        parameters: Optional[Dict[str, Any]] = None,
+        parameters: dict[str, Any] | None = None,
         storageClass: str | StorageClass | None = None,
     ) -> Any:
         """Retrieve a stored dataset.
@@ -208,7 +209,7 @@ def getDirectDeferred(
         self,
         ref: DatasetRef,
         *,
-        parameters: Union[dict, None] = None,
+        parameters: dict | None = None,
         storageClass: str | StorageClass | None = None,
     ) -> DeferredDatasetHandle:
         """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
@@ -378,7 +379,7 @@ def pruneDatasets(
             datastores known to this butler.  Note that this will make it
             impossible to retrieve these datasets even via other collections.
             Datasets that are already not stored are ignored by this option.
-        tags : `Iterable` [ `str` ], optional
+        tags : `~collections.abc.Iterable` [ `str` ], optional
             `~CollectionType.TAGGED` collections to disassociate the datasets
             from.  Ignored if ``disassociate`` is `False` or ``purge`` is
             `True`.

diff --git a/python/lsst/daf/butler/_quantum_backed.py b/python/lsst/daf/butler/_quantum_backed.py
@@ -27,7 +27,8 @@
 import logging
 import uuid
 from collections import defaultdict
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Set, Type, Union
+from collections.abc import Iterable, Mapping
+from typing import TYPE_CHECKING, Any
 
 from deprecated.sphinx import deprecated
 from lsst.resources import ResourcePathExpression
@@ -86,7 +87,7 @@ def addDatasetForeignKey(
         *,
         name: str = "dataset",
         constraint: bool = True,
-        onDelete: Optional[str] = None,
+        onDelete: str | None = None,
         **kwargs: Any,
     ) -> ddl.FieldSpec:
         # Docstring inherited.
@@ -161,10 +162,10 @@ def __init__(
         self._dimensions = dimensions
         self._predicted_inputs = set(predicted_inputs)
         self._predicted_outputs = set(predicted_outputs)
-        self._available_inputs: Set[DatasetId] = set()
-        self._unavailable_inputs: Set[DatasetId] = set()
-        self._actual_inputs: Set[DatasetId] = set()
-        self._actual_output_refs: Set[DatasetRef] = set()
+        self._available_inputs: set[DatasetId] = set()
+        self._unavailable_inputs: set[DatasetId] = set()
+        self._actual_inputs: set[DatasetId] = set()
+        self._actual_output_refs: set[DatasetRef] = set()
         self.datastore = datastore
         self.storageClasses = storageClasses
         self._dataset_types: Mapping[str, DatasetType] = {}
@@ -175,13 +176,13 @@ def __init__(
     @classmethod
     def initialize(
         cls,
-        config: Union[Config, ResourcePathExpression],
+        config: Config | ResourcePathExpression,
         quantum: Quantum,
         dimensions: DimensionUniverse,
         filename: str = ":memory:",
-        OpaqueManagerClass: Type[OpaqueTableStorageManager] = ByNameOpaqueTableStorageManager,
-        BridgeManagerClass: Type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
-        search_paths: Optional[List[str]] = None,
+        OpaqueManagerClass: type[OpaqueTableStorageManager] = ByNameOpaqueTableStorageManager,
+        BridgeManagerClass: type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
+        search_paths: list[str] | None = None,
         dataset_types: Mapping[str, DatasetType] | None = None,
     ) -> QuantumBackedButler:
         """Construct a new `QuantumBackedButler` from repository configuration
@@ -209,7 +210,8 @@ def initialize(
             location records.  Default is a SQL-backed implementation.
         search_paths : `list` of `str`, optional
             Additional search paths for butler configuration.
-        dataset_types: `Mapping` [`str`, `DatasetType`], optional
+        dataset_types: `~collections.abc.Mapping` [`str`, `DatasetType`], \
+                optional
             Mapping of the dataset type name to its registry definition.
         """
         predicted_inputs = [ref.id for ref in itertools.chain.from_iterable(quantum.inputs.values())]
@@ -231,15 +233,15 @@ def initialize(
     @classmethod
     def from_predicted(
         cls,
-        config: Union[Config, ResourcePathExpression],
+        config: Config | ResourcePathExpression,
         predicted_inputs: Iterable[DatasetId],
         predicted_outputs: Iterable[DatasetId],
         dimensions: DimensionUniverse,
         datastore_records: Mapping[str, DatastoreRecordData],
         filename: str = ":memory:",
-        OpaqueManagerClass: Type[OpaqueTableStorageManager] = ByNameOpaqueTableStorageManager,
-        BridgeManagerClass: Type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
-        search_paths: Optional[List[str]] = None,
+        OpaqueManagerClass: type[OpaqueTableStorageManager] = ByNameOpaqueTableStorageManager,
+        BridgeManagerClass: type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
+        search_paths: list[str] | None = None,
         dataset_types: Mapping[str, DatasetType] | None = None,
     ) -> QuantumBackedButler:
         """Construct a new `QuantumBackedButler` from sets of input and output
@@ -270,7 +272,8 @@ def from_predicted(
             location records.  Default is a SQL-backed implementation.
         search_paths : `list` of `str`, optional
             Additional search paths for butler configuration.
-        dataset_types: `Mapping` [`str`, `DatasetType`], optional
+        dataset_types: `~collections.abc.Mapping` [`str`, `DatasetType`], \
+                optional
             Mapping of the dataset type name to its registry definition.
         """
         return cls._initialize(
@@ -290,15 +293,15 @@ def from_predicted(
     def _initialize(
         cls,
         *,
-        config: Union[Config, ResourcePathExpression],
+        config: Config | ResourcePathExpression,
         predicted_inputs: Iterable[DatasetId],
         predicted_outputs: Iterable[DatasetId],
         dimensions: DimensionUniverse,
         filename: str = ":memory:",
         datastore_records: Mapping[str, DatastoreRecordData] | None = None,
-        OpaqueManagerClass: Type[OpaqueTableStorageManager] = ByNameOpaqueTableStorageManager,
-        BridgeManagerClass: Type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
-        search_paths: Optional[List[str]] = None,
+        OpaqueManagerClass: type[OpaqueTableStorageManager] = ByNameOpaqueTableStorageManager,
+        BridgeManagerClass: type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
+        search_paths: list[str] | None = None,
         dataset_types: Mapping[str, DatasetType] | None = None,
     ) -> QuantumBackedButler:
         """Internal method with common implementation used by `initialize` and
@@ -328,7 +331,7 @@ def _initialize(
             location records.  Default is a SQL-backed implementation.
         search_paths : `list` of `str`, optional
             Additional search paths for butler configuration.
-        dataset_types: `Mapping` [`str`, `DatasetType`]
+        dataset_types: `~collections.abc.Mapping` [`str`, `DatasetType`]
             Mapping of the dataset type name to its registry definition.
         """
         butler_config = ButlerConfig(config, searchPaths=search_paths)
@@ -384,7 +387,7 @@ def getDirect(
         self,
         ref: DatasetRef,
         *,
-        parameters: Optional[Dict[str, Any]] = None,
+        parameters: dict[str, Any] | None = None,
         storageClass: str | StorageClass | None = None,
     ) -> Any:
         # Docstring inherited.
@@ -423,7 +426,7 @@ def getDirectDeferred(
         self,
         ref: DatasetRef,
         *,
-        parameters: Union[dict, None] = None,
+        parameters: dict | None = None,
         storageClass: str | StorageClass | None = None,
     ) -> DeferredDatasetHandle:
         # Docstring inherited.
@@ -610,12 +613,12 @@ class QuantumProvenanceData(BaseModel):
     # `~CollectionType.RUN` level, such as the compute node ID). but adding it
     # now is out of scope for this prototype.
 
-    predicted_inputs: Set[uuid.UUID]
+    predicted_inputs: set[uuid.UUID]
     """Unique IDs of datasets that were predicted as inputs to this quantum
     when the QuantumGraph was built.
     """
 
-    available_inputs: Set[uuid.UUID]
+    available_inputs: set[uuid.UUID]
     """Unique IDs of input datasets that were actually present in the datastore
     when this quantum was executed.
 
@@ -624,7 +627,7 @@ class QuantumProvenanceData(BaseModel):
     task.
     """
 
-    actual_inputs: Set[uuid.UUID]
+    actual_inputs: set[uuid.UUID]
     """Unique IDs of datasets that were actually used as inputs by this task.
 
     This is a subset of `available_inputs`.
@@ -638,17 +641,17 @@ class QuantumProvenanceData(BaseModel):
     that input as actually used.
     """
 
-    predicted_outputs: Set[uuid.UUID]
+    predicted_outputs: set[uuid.UUID]
     """Unique IDs of datasets that were predicted as outputs of this quantum
     when the QuantumGraph was built.
     """
 
-    actual_outputs: Set[uuid.UUID]
+    actual_outputs: set[uuid.UUID]
     """Unique IDs of datasets that were actually written when this quantum
     was executed.
     """
 
-    datastore_records: Dict[str, SerializedDatastoreRecordData]
+    datastore_records: dict[str, SerializedDatastoreRecordData]
     """Datastore records indexed by datastore name."""
 
     @staticmethod
@@ -663,10 +666,10 @@ def collect_and_transfer(
         butler : `Butler`
             Full butler representing the data repository to transfer datasets
             to.
-        quanta : `Iterable` [ `Quantum` ]
+        quanta : `~collections.abc.Iterable` [ `Quantum` ]
             Iterable of `Quantum` objects that carry information about
             predicted outputs.  May be a single-pass iterator.
-        provenance : `Iterable` [ `QuantumProvenanceData` ]
+        provenance : `~collections.abc.Iterable` [ `QuantumProvenanceData` ]
             Provenance and datastore data for each of the given quanta, in the
             same order.  May be a single-pass iterator.
 
@@ -687,7 +690,7 @@ def collect_and_transfer(
         ignored.
         """
         grouped_refs = defaultdict(list)
-        summary_records: Dict[str, DatastoreRecordData] = {}
+        summary_records: dict[str, DatastoreRecordData] = {}
         for quantum, provenance_for_quantum in zip(quanta, provenance):
             quantum_refs_by_id = {
                 ref.id: ref
@@ -717,11 +720,11 @@ def parse_raw(cls, *args: Any, **kwargs: Any) -> QuantumProvenanceData:
     def direct(
         cls,
         *,
-        predicted_inputs: Iterable[Union[str, uuid.UUID]],
-        available_inputs: Iterable[Union[str, uuid.UUID]],
-        actual_inputs: Iterable[Union[str, uuid.UUID]],
-        predicted_outputs: Iterable[Union[str, uuid.UUID]],
-        actual_outputs: Iterable[Union[str, uuid.UUID]],
+        predicted_inputs: Iterable[str | uuid.UUID],
+        available_inputs: Iterable[str | uuid.UUID],
+        actual_inputs: Iterable[str | uuid.UUID],
+        predicted_outputs: Iterable[str | uuid.UUID],
+        actual_outputs: Iterable[str | uuid.UUID],
         datastore_records: Mapping[str, Mapping],
     ) -> QuantumProvenanceData:
         """Construct an instance directly without validators.
@@ -734,11 +737,11 @@ def direct(
         This method should only be called when the inputs are trusted.
         """
 
-        def _to_uuid_set(uuids: Iterable[Union[str, uuid.UUID]]) -> Set[uuid.UUID]:
+        def _to_uuid_set(uuids: Iterable[str | uuid.UUID]) -> set[uuid.UUID]:
             """Convert input UUIDs, which could be in string representation to
             a set of `UUID` instances.
             """
-            return set(uuid.UUID(id) if isinstance(id, str) else id for id in uuids)
+            return {uuid.UUID(id) if isinstance(id, str) else id for id in uuids}
 
         data = QuantumProvenanceData.__new__(cls)
         setter = object.__setattr__

diff --git a/python/lsst/daf/butler/cli/butler.py b/python/lsst/daf/butler/cli/butler.py
@@ -268,7 +268,7 @@ def _getPluginCommands(cls) -> defaultdict[str, list[str]]:
         commands: defaultdict[str, list[str]] = defaultdict(list)
         for pluginName in cls.getPluginList():
             try:
-                with open(pluginName, "r") as resourceFile:
+                with open(pluginName) as resourceFile:
                     resources = defaultdict(list, yaml.safe_load(resourceFile))
             except Exception as err:
                 log.warning("Error loading commands from %s, skipping. %s", pluginName, err)

diff --git a/python/lsst/daf/butler/cli/cliLog.py b/python/lsst/daf/butler/cli/cliLog.py
@@ -110,7 +110,7 @@ def root_loggers() -> set[str]:
         variable ``DAF_BUTLER_ROOT_LOGGER``. This variable can contain
         multiple default loggers separated by a ``:``.
         """
-        log_names = set(["lsst"])
+        log_names = {"lsst"}
         envvar = "DAF_BUTLER_ROOT_LOGGER"
         if envvar in os.environ:
             log_names |= set(os.environ[envvar].split(":"))

diff --git a/python/lsst/daf/butler/core/_column_categorization.py b/python/lsst/daf/butler/core/_column_categorization.py
@@ -34,6 +34,8 @@
 
 @dataclasses.dataclass
 class ColumnCategorization:
+    """Split an iterable of ColumnTag objects by type."""
+
     dimension_keys: set[str] = dataclasses.field(default_factory=set)
     dimension_records: defaultdict[str, set[str]] = dataclasses.field(
         default_factory=lambda: defaultdict(set)