Merge pull request #280 from lsst/tickets/DM-24807

DM-24807: Add type annotations to datastore
lsst · May 17, 2020 · ea07b93 · ea07b93
2 parents 20400de + 321fa34
commit ea07b93
Show file tree

Hide file tree

Showing 17 changed files with 751 additions and 317 deletions.
diff --git a/python/lsst/daf/butler/core/datasets/ref.py b/python/lsst/daf/butler/core/datasets/ref.py
@@ -23,7 +23,16 @@
 __all__ = ["AmbiguousDatasetError", "DatasetRef"]
 
 import hashlib
-from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+)
 
 from types import MappingProxyType
 from ..dimensions import DataCoordinate, DimensionGraph, ExpandedDataCoordinate
@@ -136,7 +145,7 @@ def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *,
             self._hash = hash
         return self
 
-    def __eq__(self, other: DatasetRef):
+    def __eq__(self, other: Any) -> bool:
         try:
             return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
         except AttributeError:
@@ -287,7 +296,7 @@ def isComposite(self) -> bool:
         """
         return self.datasetType.isComposite()
 
-    def _lookupNames(self) -> Tuple[LookupKey]:
+    def _lookupNames(self) -> Tuple[LookupKey, ...]:
         """Name keys to use when looking up this DatasetRef in a configuration.
 
         The names are returned in order of priority.
@@ -302,14 +311,43 @@ def _lookupNames(self) -> Tuple[LookupKey]:
         """
         # Special case the instrument Dimension since we allow configs
         # to include the instrument name in the hierarchy.
-        names = self.datasetType._lookupNames()
+        names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()
 
         if "instrument" in self.dataId:
             names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
                           for n in names) + names
 
         return names
 
+    def allRefs(self, parents: bool = True) -> Iterator[DatasetRef]:
+        """Return all the nested component `DatasetRef` and optionally the
+        parent.
+
+        Parameters
+        ----------
+        parents : `bool`, optional
+            If `True` (default) include the given dataset in the output
+            iterable.  If `False`, include only its components.  This does
+            not propagate recursively - only the outermost level of parents
+            is ignored if ``parents`` is `False`.
+
+        Yields
+        ------
+        ref : `DatasetRef`
+            Itself (only if ``parent`` is `True`) or one of its (recursive)
+            children.
+
+        Notes
+        -----
+        If ``parents`` is `True`, components are guaranteed to be yielded
+        before their parents.
+        """
+        if self.components is None:
+            raise AmbiguousDatasetError(f"Unresolved ref {self} cannot be flattened.")
+        yield from DatasetRef.flatten(self.components.values(), parents=True)
+        if parents:
+            yield self
+
     @staticmethod
     def flatten(refs: Iterable[DatasetRef], *, parents: bool = True) -> Iterator[DatasetRef]:
         """Recursively transform an iterable over `DatasetRef` to include
@@ -330,19 +368,16 @@ def flatten(refs: Iterable[DatasetRef], *, parents: bool = True) -> Iterator[Dat
         ------
         ref : `DatasetRef`
             Either one of the given `DatasetRef` instances (only if ``parent``
-            is `True`) or on of its (recursive) children.
+            is `True`) or one of its (recursive) children.
 
         Notes
         -----
         If ``parents`` is `True`, components are guaranteed to be yielded
         before their parents.
         """
         for ref in refs:
-            if ref.components is None:
-                raise AmbiguousDatasetError(f"Unresolved ref {ref} passed to 'flatten'.")
-            yield from DatasetRef.flatten(ref.components.values(), parents=True)
-            if parents:
-                yield ref
+            for subref in ref.allRefs(parents):
+                yield subref
 
     @staticmethod
     def groupByType(refs: Iterable[DatasetRef], *, recursive: bool = True

diff --git a/python/lsst/daf/butler/core/datasets/type.py b/python/lsst/daf/butler/core/datasets/type.py
@@ -27,12 +27,28 @@
 import re
 
 from types import MappingProxyType
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Iterable,
+    Mapping,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
+
+
 from ..storageClass import StorageClass, StorageClassFactory
 from ..dimensions import DimensionGraph
 from ..configSupport import LookupKey
 
+if TYPE_CHECKING:
+    from ..dimensions import Dimension, DimensionUniverse
+
 
-def _safeMakeMappingProxyType(data):
+def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
     if data is None:
         data = {}
     return MappingProxyType(data)
@@ -74,7 +90,7 @@ class DatasetType:
     VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
 
     @staticmethod
-    def nameWithComponent(datasetTypeName, componentName):
+    def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
         """Form a valid DatasetTypeName from a parent and component.
 
         No validation is performed.
@@ -93,7 +109,9 @@ def nameWithComponent(datasetTypeName, componentName):
         """
         return "{}.{}".format(datasetTypeName, componentName)
 
-    def __init__(self, name, dimensions, storageClass, *, universe=None):
+    def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
+                 storageClass: Union[StorageClass, str],
+                 *, universe: DimensionUniverse = None):
         if self.VALID_NAME_REGEX.match(name) is None:
             raise ValueError(f"DatasetType name '{name}' is invalid.")
         self._name = name
@@ -104,17 +122,18 @@ def __init__(self, name, dimensions, storageClass, *, universe=None):
             dimensions = universe.extract(dimensions)
         self._dimensions = dimensions
         assert isinstance(storageClass, (StorageClass, str))
+        self._storageClass: Optional[StorageClass]
         if isinstance(storageClass, StorageClass):
             self._storageClass = storageClass
             self._storageClassName = storageClass.name
         else:
             self._storageClass = None
             self._storageClassName = storageClass
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName)
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         if not isinstance(other, type(self)):
             return False
         if self._name != other._name:
@@ -126,7 +145,7 @@ def __eq__(self, other):
         else:
             return self._storageClassName == other._storageClassName
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         """Hash DatasetType instance.
 
         This only uses StorageClass name which is it consistent with the
@@ -135,21 +154,21 @@ def __hash__(self):
         return hash((self._name, self._dimensions, self._storageClassName))
 
     @property
-    def name(self):
+    def name(self) -> str:
         """A string name for the Dataset; must correspond to the same
         `DatasetType` across all Registries.
         """
         return self._name
 
     @property
-    def dimensions(self):
+    def dimensions(self) -> DimensionGraph:
         r"""The `Dimension`\ s that label and relate instances of this
         `DatasetType` (`DimensionGraph`).
         """
         return self._dimensions
 
     @property
-    def storageClass(self):
+    def storageClass(self) -> StorageClass:
         """`StorageClass` instance that defines how this `DatasetType`
         is persisted. Note that if DatasetType was constructed with a name
         of a StorageClass then Butler has to be initialized before using
@@ -160,7 +179,7 @@ def storageClass(self):
         return self._storageClass
 
     @staticmethod
-    def splitDatasetTypeName(datasetTypeName):
+    def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
         """Given a dataset type name, return the root name and the component
         name.
 
@@ -189,7 +208,7 @@ def splitDatasetTypeName(datasetTypeName):
             root, comp = root.split(".", maxsplit=1)
         return root, comp
 
-    def nameAndComponent(self):
+    def nameAndComponent(self) -> Tuple[str, Optional[str]]:
         """Return the root name of this dataset type and the component
         name (if defined).
 
@@ -202,7 +221,7 @@ def nameAndComponent(self):
         """
         return self.splitDatasetTypeName(self.name)
 
-    def component(self):
+    def component(self) -> Optional[str]:
         """Component name (if defined)
 
         Returns
@@ -214,7 +233,7 @@ def component(self):
         _, comp = self.nameAndComponent()
         return comp
 
-    def componentTypeName(self, component):
+    def componentTypeName(self, component: str) -> str:
         """Given a component name, derive the datasetTypeName of that component
 
         Parameters
@@ -253,7 +272,7 @@ def makeComponentDatasetType(self, component: str) -> DatasetType:
         return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
                            storageClass=self.storageClass.components[component])
 
-    def isComponent(self):
+    def isComponent(self) -> bool:
         """Boolean indicating whether this `DatasetType` refers to a
         component of a composite.
 
@@ -266,7 +285,7 @@ def isComponent(self):
             return True
         return False
 
-    def isComposite(self):
+    def isComposite(self) -> bool:
         """Boolean indicating whether this `DatasetType` is a composite type.
 
         Returns
@@ -277,7 +296,7 @@ def isComposite(self):
         """
         return self.storageClass.isComposite()
 
-    def _lookupNames(self):
+    def _lookupNames(self) -> Tuple[LookupKey, ...]:
         """Name keys to use when looking up this datasetType in a
         configuration.
 
@@ -292,7 +311,7 @@ def _lookupNames(self):
             the storage class name.
         """
         rootName, componentName = self.nameAndComponent()
-        lookups = (LookupKey(name=self.name),)
+        lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
         if componentName is not None:
             lookups = lookups + (LookupKey(name=rootName),)
 
@@ -302,15 +321,15 @@ def _lookupNames(self):
 
         return lookups + self.storageClass._lookupNames()
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str]]:
         """Support pickling.
 
         StorageClass instances can not normally be pickled, so we pickle
         StorageClass name instead of instance.
         """
         return (DatasetType, (self.name, self.dimensions, self._storageClassName))
 
-    def __deepcopy__(self, memo):
+    def __deepcopy__(self, memo: Any) -> DatasetType:
         """Support for deep copy method.
 
         Normally ``deepcopy`` will use pickle mechanism to make copies.