Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-24807: Add type annotations to datastore #280

Merged
merged 5 commits into from
May 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
55 changes: 45 additions & 10 deletions python/lsst/daf/butler/core/datasets/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,16 @@
__all__ = ["AmbiguousDatasetError", "DatasetRef"]

import hashlib
from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple
from typing import (
Any,
Dict,
Iterable,
Iterator,
List,
Mapping,
Optional,
Tuple,
)

from types import MappingProxyType
from ..dimensions import DataCoordinate, DimensionGraph, ExpandedDataCoordinate
Expand Down Expand Up @@ -136,7 +145,7 @@ def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *,
self._hash = hash
return self

def __eq__(self, other: DatasetRef):
def __eq__(self, other: Any) -> bool:
try:
return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
except AttributeError:
Expand Down Expand Up @@ -287,7 +296,7 @@ def isComposite(self) -> bool:
"""
return self.datasetType.isComposite()

def _lookupNames(self) -> Tuple[LookupKey]:
def _lookupNames(self) -> Tuple[LookupKey, ...]:
"""Name keys to use when looking up this DatasetRef in a configuration.

The names are returned in order of priority.
Expand All @@ -302,14 +311,43 @@ def _lookupNames(self) -> Tuple[LookupKey]:
"""
# Special case the instrument Dimension since we allow configs
# to include the instrument name in the hierarchy.
names = self.datasetType._lookupNames()
names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()

if "instrument" in self.dataId:
names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
for n in names) + names

return names

def allRefs(self, parents: bool = True) -> Iterator[DatasetRef]:
"""Return all the nested component `DatasetRef` and optionally the
parent.

Parameters
----------
parents : `bool`, optional
If `True` (default) include the given dataset in the output
iterable. If `False`, include only its components. This does
not propagate recursively - only the outermost level of parents
is ignored if ``parents`` is `False`.

Yields
------
ref : `DatasetRef`
Itself (only if ``parent`` is `True`) or one of its (recursive)
children.

Notes
-----
If ``parents`` is `True`, components are guaranteed to be yielded
before their parents.
"""
if self.components is None:
raise AmbiguousDatasetError(f"Unresolved ref {self} cannot be flattened.")
yield from DatasetRef.flatten(self.components.values(), parents=True)
if parents:
yield self

@staticmethod
def flatten(refs: Iterable[DatasetRef], *, parents: bool = True) -> Iterator[DatasetRef]:
"""Recursively transform an iterable over `DatasetRef` to include
Expand All @@ -330,19 +368,16 @@ def flatten(refs: Iterable[DatasetRef], *, parents: bool = True) -> Iterator[Dat
------
ref : `DatasetRef`
Either one of the given `DatasetRef` instances (only if ``parent``
is `True`) or on of its (recursive) children.
is `True`) or one of its (recursive) children.

Notes
-----
If ``parents`` is `True`, components are guaranteed to be yielded
before their parents.
"""
for ref in refs:
if ref.components is None:
raise AmbiguousDatasetError(f"Unresolved ref {ref} passed to 'flatten'.")
yield from DatasetRef.flatten(ref.components.values(), parents=True)
if parents:
yield ref
for subref in ref.allRefs(parents):
yield subref

@staticmethod
def groupByType(refs: Iterable[DatasetRef], *, recursive: bool = True
Expand Down
57 changes: 38 additions & 19 deletions python/lsst/daf/butler/core/datasets/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,28 @@
import re

from types import MappingProxyType

from typing import (
TYPE_CHECKING,
Any,
Iterable,
Mapping,
Optional,
Tuple,
Type,
Union,
)


from ..storageClass import StorageClass, StorageClassFactory
from ..dimensions import DimensionGraph
from ..configSupport import LookupKey

if TYPE_CHECKING:
from ..dimensions import Dimension, DimensionUniverse


def _safeMakeMappingProxyType(data):
def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
if data is None:
data = {}
return MappingProxyType(data)
Expand Down Expand Up @@ -74,7 +90,7 @@ class DatasetType:
VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")

@staticmethod
def nameWithComponent(datasetTypeName, componentName):
def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
"""Form a valid DatasetTypeName from a parent and component.

No validation is performed.
Expand All @@ -93,7 +109,9 @@ def nameWithComponent(datasetTypeName, componentName):
"""
return "{}.{}".format(datasetTypeName, componentName)

def __init__(self, name, dimensions, storageClass, *, universe=None):
def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
storageClass: Union[StorageClass, str],
*, universe: DimensionUniverse = None):
if self.VALID_NAME_REGEX.match(name) is None:
raise ValueError(f"DatasetType name '{name}' is invalid.")
self._name = name
Expand All @@ -104,17 +122,18 @@ def __init__(self, name, dimensions, storageClass, *, universe=None):
dimensions = universe.extract(dimensions)
self._dimensions = dimensions
assert isinstance(storageClass, (StorageClass, str))
self._storageClass: Optional[StorageClass]
if isinstance(storageClass, StorageClass):
self._storageClass = storageClass
self._storageClassName = storageClass.name
else:
self._storageClass = None
self._storageClassName = storageClass

def __repr__(self):
def __repr__(self) -> str:
return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName)

def __eq__(self, other):
def __eq__(self, other: Any) -> bool:
if not isinstance(other, type(self)):
return False
if self._name != other._name:
Expand All @@ -126,7 +145,7 @@ def __eq__(self, other):
else:
return self._storageClassName == other._storageClassName

def __hash__(self):
def __hash__(self) -> int:
"""Hash DatasetType instance.

This only uses StorageClass name which is it consistent with the
Expand All @@ -135,21 +154,21 @@ def __hash__(self):
return hash((self._name, self._dimensions, self._storageClassName))

@property
def name(self):
def name(self) -> str:
"""A string name for the Dataset; must correspond to the same
`DatasetType` across all Registries.
"""
return self._name

@property
def dimensions(self):
def dimensions(self) -> DimensionGraph:
r"""The `Dimension`\ s that label and relate instances of this
`DatasetType` (`DimensionGraph`).
"""
return self._dimensions

@property
def storageClass(self):
def storageClass(self) -> StorageClass:
"""`StorageClass` instance that defines how this `DatasetType`
is persisted. Note that if DatasetType was constructed with a name
of a StorageClass then Butler has to be initialized before using
Expand All @@ -160,7 +179,7 @@ def storageClass(self):
return self._storageClass

@staticmethod
def splitDatasetTypeName(datasetTypeName):
def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
"""Given a dataset type name, return the root name and the component
name.

Expand Down Expand Up @@ -189,7 +208,7 @@ def splitDatasetTypeName(datasetTypeName):
root, comp = root.split(".", maxsplit=1)
return root, comp

def nameAndComponent(self):
def nameAndComponent(self) -> Tuple[str, Optional[str]]:
"""Return the root name of this dataset type and the component
name (if defined).

Expand All @@ -202,7 +221,7 @@ def nameAndComponent(self):
"""
return self.splitDatasetTypeName(self.name)

def component(self):
def component(self) -> Optional[str]:
"""Component name (if defined)

Returns
Expand All @@ -214,7 +233,7 @@ def component(self):
_, comp = self.nameAndComponent()
return comp

def componentTypeName(self, component):
def componentTypeName(self, component: str) -> str:
"""Given a component name, derive the datasetTypeName of that component

Parameters
Expand Down Expand Up @@ -253,7 +272,7 @@ def makeComponentDatasetType(self, component: str) -> DatasetType:
return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
storageClass=self.storageClass.components[component])

def isComponent(self):
def isComponent(self) -> bool:
"""Boolean indicating whether this `DatasetType` refers to a
component of a composite.

Expand All @@ -266,7 +285,7 @@ def isComponent(self):
return True
return False

def isComposite(self):
def isComposite(self) -> bool:
"""Boolean indicating whether this `DatasetType` is a composite type.

Returns
Expand All @@ -277,7 +296,7 @@ def isComposite(self):
"""
return self.storageClass.isComposite()

def _lookupNames(self):
def _lookupNames(self) -> Tuple[LookupKey, ...]:
"""Name keys to use when looking up this datasetType in a
configuration.

Expand All @@ -292,7 +311,7 @@ def _lookupNames(self):
the storage class name.
"""
rootName, componentName = self.nameAndComponent()
lookups = (LookupKey(name=self.name),)
lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
if componentName is not None:
lookups = lookups + (LookupKey(name=rootName),)

Expand All @@ -302,15 +321,15 @@ def _lookupNames(self):

return lookups + self.storageClass._lookupNames()

def __reduce__(self):
def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str]]:
"""Support pickling.

StorageClass instances can not normally be pickled, so we pickle
StorageClass name instead of instance.
"""
return (DatasetType, (self.name, self.dimensions, self._storageClassName))

def __deepcopy__(self, memo):
def __deepcopy__(self, memo: Any) -> DatasetType:
"""Support for deep copy method.

Normally ``deepcopy`` will use pickle mechanism to make copies.
Expand Down