Merge pull request #343 from lsst/tickets/DM-39605

lsst · Jun 10, 2023 · 5f77edf · 5f77edf
2 parents 2d6725e + bfed269
commit 5f77edf
Show file tree

Hide file tree

Showing 38 changed files with 919 additions and 808 deletions.
diff --git a/.github/workflows/docstyle.yaml b/.github/workflows/docstyle.yaml
@@ -0,0 +1,13 @@
+name: Run docstyle
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  call-workflow:
+    uses: lsst/rubin_workflows/.github/workflows/docstyle.yaml@main
+    with:
+      args: "python/"
diff --git a/pyproject.toml b/pyproject.toml
@@ -125,3 +125,74 @@ exclude_lines = [
     "if __name__ == .__main__.:",
     "if TYPE_CHECKING:",
 ]
+
+[tool.pydocstyle]
+convention = "numpy"
+# Our coding style does not require docstrings for magic methods (D105)
+# Our docstyle documents __init__ at the class level (D107)
+# We allow methods to inherit docstrings and this is not compatible with D102.
+# Docstring at the very first line is not required
+# D200, D205 and D400 all complain if the first sentence of the docstring does
+# not fit on one line.
+# Do not require docstrings in __init__.py files (D104)
+add-ignore = ["D107", "D105", "D102", "D100", "D200", "D205", "D400", "D104"]
+
+# The rule used with Ruff configuration is to disable every lint that has
+# legitimate exceptions that are not dodgy code, rather than cluttering code
+# with noqa markers. This is therefore a reiatively relaxed configuration that
+# errs on the side of disabling legitimate lints.
+#
+# Reference for settings: https://beta.ruff.rs/docs/settings/
+# Reference for rules: https://beta.ruff.rs/docs/rules/
+[tool.ruff]
+exclude = [
+    "docs/**",
+]
+line-length = 110
+ignore = [
+    "ANN101",  # self should not have a type annotation
+    "ANN102",  # cls should not have a type annotation
+    "ANN401",  # sometimes Any is the right type
+    "ARG001",  # unused function arguments are often legitimate
+    "ARG002",  # unused method arguments are often legitimate
+    "ARG005",  # unused lambda arguments are often legitimate
+    "BLE001",  # we want to catch and report Exception in background tasks
+    "C414",    # nested sorted is how you sort by multiple keys with reverse
+    "COM812",  # omitting trailing commas allows black autoreformatting
+    "D102",    # sometimes we use docstring inheritence
+    "D104",    # don't see the point of documenting every package
+    "D105",    # our style doesn't require docstrings for magic methods
+    "D106",    # Pydantic uses a nested Config class that doesn't warrant docs
+    "EM101",   # justification (duplicate string in traceback) is silly
+    "EM102",   # justification (duplicate string in traceback) is silly
+    "FBT003",  # positional booleans are normal for Pydantic field defaults
+    "G004",    # forbidding logging f-strings is appealing, but not our style
+    "RET505",  # disagree that omitting else always makes code more readable
+    "PLR0913", # factory pattern uses constructors with many arguments
+    "PLR2004", # too aggressive about magic values
+    "S105",    # good idea but too many false positives on non-passwords
+    "S106",    # good idea but too many false positives on non-passwords
+    "SIM102",  # sometimes the formatting of nested if statements is clearer
+    "SIM117",  # sometimes nested with contexts are clearer
+    "TCH001",  # we decided to not maintain separate TYPE_CHECKING blocks
+    "TCH002",  # we decided to not maintain separate TYPE_CHECKING blocks
+    "TCH003",  # we decided to not maintain separate TYPE_CHECKING blocks
+    "TID252",  # if we're going to use relative imports, use them always
+    "TRY003",  # good general advice but lint is way too aggressive
+    "N802",
+    "N803",
+    "N806",
+    "N812",
+    "N815",
+    "N816",
+    "D107",
+    "D105",
+    "D102",
+    "D100",
+    "D200",
+    "D205",
+    "D400",
+    "D104",
+]
+select = ["ALL"]
+target-version = "py311"
diff --git a/python/lsst/pipe/base/_datasetQueryConstraints.py b/python/lsst/pipe/base/_datasetQueryConstraints.py
@@ -27,17 +27,18 @@
 __all__ = ("DatasetQueryConstraintVariant",)
 
 import warnings
-from typing import Iterable, Iterator, Protocol, Type
+from collections.abc import Iterable, Iterator
+from typing import Protocol
 
 
 class DatasetQueryConstraintVariant(Iterable, Protocol):
-    """This class is the base for all the valid variants for controling
+    """Base for all the valid variants for controlling
     constraining graph building queries based on dataset type existence.
 
     ALL variant corresponds to using all input dataset types to constrain
     a query.
 
-    OFF variant corrresponds to not using any dataset types to constrain a
+    OFF variant corresponds to not using any dataset types to constrain a
     graph building query.
 
     LIST variant should be used when one or more specific names should be used
@@ -54,9 +55,9 @@ class DatasetQueryConstraintVariant(Iterable, Protocol):
     `fromExpression` class method given a valid string.
     """
 
-    ALL: "Type[_ALL]"
-    OFF: "Type[_OFF]"
-    LIST: "Type[_LIST]"
+    ALL: "type[_ALL]"
+    OFF: "type[_OFF]"
+    LIST: "type[_LIST]"
 
     @classmethod
     def __subclasshook__(cls, subclass):
@@ -69,9 +70,9 @@ def fromExpression(cls, expression: str) -> "DatasetQueryConstraintVariant":
         """Select and return the correct Variant that corresponds to the input
         expression.
 
-        Valid values are `all` for all inputs dataset types in pipeline, `off`
-        to not consider dataset type existence as a constraint, single or comma
-        seperated list of dataset type names.
+        Valid values are ``all`` for all inputs dataset types in pipeline,
+        ``off`` to not consider dataset type existence as a constraint, single
+        or comma-separated list of dataset type names.
         """
         if not isinstance(expression, str):
             raise ValueError("Expression must be a string")

diff --git a/python/lsst/pipe/base/_dataset_handle.py b/python/lsst/pipe/base/_dataset_handle.py
@@ -23,7 +23,7 @@
 __all__ = ["InMemoryDatasetHandle"]
 
 import dataclasses
-from typing import Any, Optional, cast
+from typing import Any, cast
 
 from frozendict import frozendict
 from lsst.daf.butler import (
@@ -85,11 +85,11 @@ def __init__(
     def get(
         self,
         *,
-        component: Optional[str] = None,
-        parameters: Optional[dict] = None,
+        component: str | None = None,
+        parameters: dict | None = None,
         storageClass: str | StorageClass | None = None,
     ) -> Any:
-        """Retrieves the dataset pointed to by this handle
+        """Retrieve the dataset pointed to by this handle.
 
         This handle may be used multiple times, possibly with different
         parameters.
@@ -101,14 +101,14 @@ def get(
             may specify the name of the component to use in the get operation.
         parameters : `dict` or None
             The parameters argument will be passed to the butler get method.
-            It defaults to None. If the value is not None, this dict will
+            It defaults to `None`. If the value is not `None`, this `dict` will
             be merged with the parameters dict used to construct the
-            `DeferredDatasetHandle` class.
-        storageClass : `StorageClass` or `str`, optional
+            `~lsst.daf.butler.DeferredDatasetHandle` class.
+        storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional
             The storage class to be used to override the Python type
             returned by this method. By default the returned type matches
-            the type stored. Specifying a read `StorageClass` can force a
-            different type to be returned.
+            the type stored. Specifying a read `~lsst.daf.butler.StorageClass`
+            can force a different type to be returned.
             This type must be compatible with the original type.
 
         Returns
@@ -218,7 +218,7 @@ def _getStorageClass(self) -> StorageClass:
 
         Returns
         -------
-        storageClass : `StorageClass`
+        storageClass : `~lsst.daf.butler.StorageClass`
             The storage class associated with this handle, or one derived
             from the python type of the stored object.
 
@@ -244,18 +244,18 @@ def _getStorageClass(self) -> StorageClass:
     handle.
     """
 
-    storageClass: Optional[str] = None
+    storageClass: str | None = None
     """The name of the `~lsst.daf.butler.StorageClass` associated with this
     dataset.
 
     If `None`, the storage class will be looked up from the factory.
     """
 
-    parameters: Optional[dict] = None
+    parameters: dict | None = None
     """Optional parameters that may be used to specify a subset of the dataset
     to be loaded (`dict` or `None`).
     """
 
     copy: bool = False
     """Control whether a copy of the in-memory dataset is returned for every
-    call to get()."""
+    call to `get()`."""
diff --git a/python/lsst/pipe/base/_instrument.py b/python/lsst/pipe/base/_instrument.py
@@ -26,7 +26,8 @@
 import datetime
 import os.path
 from abc import ABCMeta, abstractmethod
-from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union, cast, final
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any, cast, final
 
 from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
 from lsst.daf.butler.registry import DataIdError
@@ -46,10 +47,10 @@ class Instrument(metaclass=ABCMeta):
     Parameters
     ----------
     collection_prefix : `str`, optional
-        Prefix for collection names to use instead of the intrument's own name.
-        This is primarily for use in simulated-data repositories, where the
-        instrument name may not be necessary and/or sufficient to distinguish
-        between collections.
+        Prefix for collection names to use instead of the instrument's own
+        name. This is primarily for use in simulated-data repositories, where
+        the instrument name may not be necessary and/or sufficient to
+        distinguish between collections.
 
     Notes
     -----
@@ -64,7 +65,7 @@ class Instrument(metaclass=ABCMeta):
     each of the Tasks that requires special configuration.
     """
 
-    policyName: Optional[str] = None
+    policyName: str | None = None
     """Instrument specific name to use when locating a policy or configuration
     file in the file system."""
 
@@ -73,7 +74,7 @@ class Instrument(metaclass=ABCMeta):
     of the dataset type name, a tuple of dimension names, and the storage class
     name. If `None` the ingest system will use its default definition."""
 
-    def __init__(self, collection_prefix: Optional[str] = None):
+    def __init__(self, collection_prefix: str | None = None):
         if collection_prefix is None:
             collection_prefix = self.getName()
         self.collection_prefix = collection_prefix
@@ -131,7 +132,7 @@ def register(self, registry: Registry, *, update: bool = False) -> None:
         raise NotImplementedError()
 
     @staticmethod
-    def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = None) -> Instrument:
+    def fromName(name: str, registry: Registry, collection_prefix: str | None = None) -> Instrument:
         """Given an instrument name and a butler registry, retrieve a
         corresponding instantiated instrument object.
 
@@ -142,7 +143,7 @@ def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = N
         registry : `lsst.daf.butler.Registry`
             Butler registry to query to find the information.
         collection_prefix : `str`, optional
-            Prefix for collection names to use instead of the intrument's own
+            Prefix for collection names to use instead of the instrument's own
             name.  This is primarily for use in simulated-data repositories,
             where the instrument name may not be necessary and/or sufficient to
             distinguish between collections.
@@ -182,7 +183,7 @@ def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = N
 
     @staticmethod
     def from_string(
-        name: str, registry: Optional[Registry] = None, collection_prefix: Optional[str] = None
+        name: str, registry: Registry | None = None, collection_prefix: str | None = None
     ) -> Instrument:
         """Return an instance from the short name or class name.
 
@@ -199,7 +200,7 @@ def from_string(
             Butler registry to query to find information about the instrument,
             by default `None`.
         collection_prefix : `str`, optional
-            Prefix for collection names to use instead of the intrument's own
+            Prefix for collection names to use instead of the instrument's own
             name. This is primarily for use in simulated-data repositories,
             where the instrument name may not be necessary and/or sufficient
             to distinguish between collections.
@@ -242,15 +243,15 @@ def from_string(
         return instr
 
     @staticmethod
-    def from_data_id(data_id: DataCoordinate, collection_prefix: Optional[str] = None) -> Instrument:
+    def from_data_id(data_id: DataCoordinate, collection_prefix: str | None = None) -> Instrument:
         """Instantiate an `Instrument` object from a fully-expanded data ID.
 
         Parameters
         ----------
-        data_id : `DataCoordinate`
+        data_id : `~lsst.daf.butler.DataCoordinate`
             Expanded data ID that includes the instrument dimension.
         collection_prefix : `str`, optional
-            Prefix for collection names to use instead of the intrument's own
+            Prefix for collection names to use instead of the instrument's own
             name.  This is primarily for use in simulated-data repositories,
             where the instrument name may not be necessary and/or sufficient to
             distinguish between collections.
@@ -282,7 +283,7 @@ def _from_cls_name(cls_name: str, collection_prefix: str | None = None) -> Instr
         cls_name : `str`
             Fully-qualified name of the type.
         collection_prefix : `str`, optional
-            Prefix for collection names to use instead of the intrument's own
+            Prefix for collection names to use instead of the instrument's own
             name.  This is primarily for use in simulated-data repositories,
             where the instrument name may not be necessary and/or sufficient to
             distinguish between collections.
@@ -331,7 +332,7 @@ def importAll(registry: Registry) -> None:
                 pass
 
     @abstractmethod
-    def getRawFormatter(self, dataId: DataId) -> Type[Formatter]:
+    def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
         """Return the Formatter class that should be used to read a particular
         raw file.
 
@@ -365,7 +366,7 @@ def applyConfigOverrides(self, name: str, config: Config) -> None:
                 config.load(path)
 
     @staticmethod
-    def formatCollectionTimestamp(timestamp: Union[str, datetime.datetime]) -> str:
+    def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
         """Format a timestamp for use in a collection name.
 
         Parameters

diff --git a/python/lsst/pipe/base/_observation_dimension_packer.py b/python/lsst/pipe/base/_observation_dimension_packer.py
@@ -34,6 +34,8 @@
 
 
 class ObservationDimensionPackerConfig(Config):
+    """Config associated with a `ObservationDimensionPacker`."""
+
     # Config fields are annotated as Any because support for better
     # annotations is broken on Fields with optional=True.
     n_detectors: Any = Field(