Skip to content

Commit

Permalink
Merge pull request #280 from lsst/tickets/DM-24807
Browse files Browse the repository at this point in the history
DM-24807: Add type annotations to datastore
  • Loading branch information
timj committed May 17, 2020
2 parents 20400de + 321fa34 commit ea07b93
Show file tree
Hide file tree
Showing 17 changed files with 751 additions and 317 deletions.
55 changes: 45 additions & 10 deletions python/lsst/daf/butler/core/datasets/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,16 @@
__all__ = ["AmbiguousDatasetError", "DatasetRef"]

import hashlib
from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple
from typing import (
Any,
Dict,
Iterable,
Iterator,
List,
Mapping,
Optional,
Tuple,
)

from types import MappingProxyType
from ..dimensions import DataCoordinate, DimensionGraph, ExpandedDataCoordinate
Expand Down Expand Up @@ -136,7 +145,7 @@ def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *,
self._hash = hash
return self

def __eq__(self, other: DatasetRef):
def __eq__(self, other: Any) -> bool:
try:
return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
except AttributeError:
Expand Down Expand Up @@ -287,7 +296,7 @@ def isComposite(self) -> bool:
"""
return self.datasetType.isComposite()

def _lookupNames(self) -> Tuple[LookupKey]:
def _lookupNames(self) -> Tuple[LookupKey, ...]:
"""Name keys to use when looking up this DatasetRef in a configuration.
The names are returned in order of priority.
Expand All @@ -302,14 +311,43 @@ def _lookupNames(self) -> Tuple[LookupKey]:
"""
# Special case the instrument Dimension since we allow configs
# to include the instrument name in the hierarchy.
names = self.datasetType._lookupNames()
names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()

if "instrument" in self.dataId:
names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
for n in names) + names

return names

def allRefs(self, parents: bool = True) -> Iterator[DatasetRef]:
"""Return all the nested component `DatasetRef` and optionally the
parent.
Parameters
----------
parents : `bool`, optional
If `True` (default) include the given dataset in the output
iterable. If `False`, include only its components. This does
not propagate recursively - only the outermost level of parents
is ignored if ``parents`` is `False`.
Yields
------
ref : `DatasetRef`
Itself (only if ``parent`` is `True`) or one of its (recursive)
children.
Notes
-----
If ``parents`` is `True`, components are guaranteed to be yielded
before their parents.
"""
if self.components is None:
raise AmbiguousDatasetError(f"Unresolved ref {self} cannot be flattened.")
yield from DatasetRef.flatten(self.components.values(), parents=True)
if parents:
yield self

@staticmethod
def flatten(refs: Iterable[DatasetRef], *, parents: bool = True) -> Iterator[DatasetRef]:
"""Recursively transform an iterable over `DatasetRef` to include
Expand All @@ -330,19 +368,16 @@ def flatten(refs: Iterable[DatasetRef], *, parents: bool = True) -> Iterator[Dat
------
ref : `DatasetRef`
Either one of the given `DatasetRef` instances (only if ``parent``
is `True`) or on of its (recursive) children.
is `True`) or one of its (recursive) children.
Notes
-----
If ``parents`` is `True`, components are guaranteed to be yielded
before their parents.
"""
for ref in refs:
if ref.components is None:
raise AmbiguousDatasetError(f"Unresolved ref {ref} passed to 'flatten'.")
yield from DatasetRef.flatten(ref.components.values(), parents=True)
if parents:
yield ref
for subref in ref.allRefs(parents):
yield subref

@staticmethod
def groupByType(refs: Iterable[DatasetRef], *, recursive: bool = True
Expand Down
57 changes: 38 additions & 19 deletions python/lsst/daf/butler/core/datasets/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,28 @@
import re

from types import MappingProxyType

from typing import (
TYPE_CHECKING,
Any,
Iterable,
Mapping,
Optional,
Tuple,
Type,
Union,
)


from ..storageClass import StorageClass, StorageClassFactory
from ..dimensions import DimensionGraph
from ..configSupport import LookupKey

if TYPE_CHECKING:
from ..dimensions import Dimension, DimensionUniverse


def _safeMakeMappingProxyType(data):
def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
if data is None:
data = {}
return MappingProxyType(data)
Expand Down Expand Up @@ -74,7 +90,7 @@ class DatasetType:
VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")

@staticmethod
def nameWithComponent(datasetTypeName, componentName):
def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
"""Form a valid DatasetTypeName from a parent and component.
No validation is performed.
Expand All @@ -93,7 +109,9 @@ def nameWithComponent(datasetTypeName, componentName):
"""
return "{}.{}".format(datasetTypeName, componentName)

def __init__(self, name, dimensions, storageClass, *, universe=None):
def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
storageClass: Union[StorageClass, str],
*, universe: DimensionUniverse = None):
if self.VALID_NAME_REGEX.match(name) is None:
raise ValueError(f"DatasetType name '{name}' is invalid.")
self._name = name
Expand All @@ -104,17 +122,18 @@ def __init__(self, name, dimensions, storageClass, *, universe=None):
dimensions = universe.extract(dimensions)
self._dimensions = dimensions
assert isinstance(storageClass, (StorageClass, str))
self._storageClass: Optional[StorageClass]
if isinstance(storageClass, StorageClass):
self._storageClass = storageClass
self._storageClassName = storageClass.name
else:
self._storageClass = None
self._storageClassName = storageClass

def __repr__(self):
def __repr__(self) -> str:
return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName)

def __eq__(self, other):
def __eq__(self, other: Any) -> bool:
if not isinstance(other, type(self)):
return False
if self._name != other._name:
Expand All @@ -126,7 +145,7 @@ def __eq__(self, other):
else:
return self._storageClassName == other._storageClassName

def __hash__(self):
def __hash__(self) -> int:
"""Hash DatasetType instance.
This only uses StorageClass name which is it consistent with the
Expand All @@ -135,21 +154,21 @@ def __hash__(self):
return hash((self._name, self._dimensions, self._storageClassName))

@property
def name(self):
def name(self) -> str:
"""A string name for the Dataset; must correspond to the same
`DatasetType` across all Registries.
"""
return self._name

@property
def dimensions(self):
def dimensions(self) -> DimensionGraph:
r"""The `Dimension`\ s that label and relate instances of this
`DatasetType` (`DimensionGraph`).
"""
return self._dimensions

@property
def storageClass(self):
def storageClass(self) -> StorageClass:
"""`StorageClass` instance that defines how this `DatasetType`
is persisted. Note that if DatasetType was constructed with a name
of a StorageClass then Butler has to be initialized before using
Expand All @@ -160,7 +179,7 @@ def storageClass(self):
return self._storageClass

@staticmethod
def splitDatasetTypeName(datasetTypeName):
def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
"""Given a dataset type name, return the root name and the component
name.
Expand Down Expand Up @@ -189,7 +208,7 @@ def splitDatasetTypeName(datasetTypeName):
root, comp = root.split(".", maxsplit=1)
return root, comp

def nameAndComponent(self):
def nameAndComponent(self) -> Tuple[str, Optional[str]]:
"""Return the root name of this dataset type and the component
name (if defined).
Expand All @@ -202,7 +221,7 @@ def nameAndComponent(self):
"""
return self.splitDatasetTypeName(self.name)

def component(self):
def component(self) -> Optional[str]:
"""Component name (if defined)
Returns
Expand All @@ -214,7 +233,7 @@ def component(self):
_, comp = self.nameAndComponent()
return comp

def componentTypeName(self, component):
def componentTypeName(self, component: str) -> str:
"""Given a component name, derive the datasetTypeName of that component
Parameters
Expand Down Expand Up @@ -253,7 +272,7 @@ def makeComponentDatasetType(self, component: str) -> DatasetType:
return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
storageClass=self.storageClass.components[component])

def isComponent(self):
def isComponent(self) -> bool:
"""Boolean indicating whether this `DatasetType` refers to a
component of a composite.
Expand All @@ -266,7 +285,7 @@ def isComponent(self):
return True
return False

def isComposite(self):
def isComposite(self) -> bool:
"""Boolean indicating whether this `DatasetType` is a composite type.
Returns
Expand All @@ -277,7 +296,7 @@ def isComposite(self):
"""
return self.storageClass.isComposite()

def _lookupNames(self):
def _lookupNames(self) -> Tuple[LookupKey, ...]:
"""Name keys to use when looking up this datasetType in a
configuration.
Expand All @@ -292,7 +311,7 @@ def _lookupNames(self):
the storage class name.
"""
rootName, componentName = self.nameAndComponent()
lookups = (LookupKey(name=self.name),)
lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
if componentName is not None:
lookups = lookups + (LookupKey(name=rootName),)

Expand All @@ -302,15 +321,15 @@ def _lookupNames(self):

return lookups + self.storageClass._lookupNames()

def __reduce__(self):
def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str]]:
"""Support pickling.
StorageClass instances can not normally be pickled, so we pickle
StorageClass name instead of instance.
"""
return (DatasetType, (self.name, self.dimensions, self._storageClassName))

def __deepcopy__(self, memo):
def __deepcopy__(self, memo: Any) -> DatasetType:
"""Support for deep copy method.
Normally ``deepcopy`` will use pickle mechanism to make copies.
Expand Down

0 comments on commit ea07b93

Please sign in to comment.