Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-2850: Remove getSchemaCatalogs API #292

Merged
merged 4 commits into from
Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/changes/DM-2850.removal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Removed the ``Task.getSchemaCatalogs`` and ``Task.getAllSchemaCatalogs`` APIs.
These were used by ``CmdLineTask`` but are no longer used in the current middleware.
2 changes: 1 addition & 1 deletion python/lsst/pipe/base/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ class attribute must match a function argument name in the ``run``

dimensions: ClassVar[Set[str]]

def __init__(self, *, config: "PipelineTaskConfig" = None):
def __init__(self, *, config: "PipelineTaskConfig" | None = None):
self.inputs: Set[str] = set(self.inputs)
self.prerequisiteInputs: Set[str] = set(self.prerequisiteInputs)
self.outputs: Set[str] = set(self.outputs)
Expand Down
16 changes: 14 additions & 2 deletions python/lsst/pipe/base/pipelineIR.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,19 @@
from collections import Counter
from collections.abc import Iterable as abcIterable
from dataclasses import dataclass, field
from typing import Any, Dict, Generator, List, Literal, Mapping, MutableMapping, Optional, Set, Union
from typing import (
Any,
Dict,
Generator,
Hashable,
List,
Literal,
Mapping,
MutableMapping,
Optional,
Set,
Union,
)

import yaml
from lsst.resources import ResourcePath, ResourcePathExpression
Expand All @@ -46,7 +58,7 @@ class PipelineYamlLoader(yaml.SafeLoader):
found inside a pipeline file at a given scope.
"""

def construct_mapping(self, node: yaml.Node, deep: bool = False) -> Mapping[str, Any]:
def construct_mapping(self, node: yaml.MappingNode, deep: bool = False) -> dict[Hashable, Any]:
# do the call to super first so that it can do all the other forms of
# checking on this node. If you check the uniqueness of keys first
# it would save the work that super does in the case of a failure, but
Expand Down
63 changes: 0 additions & 63 deletions python/lsst/pipe/base/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,69 +218,6 @@ def emptyMetadata(self) -> None:
assert subtask is not None, "Unexpected garbage collection of subtask."
subtask.metadata = _TASK_METADATA_TYPE()

# We use Any instead of lsst.afw.table.BaseCatalog here to avoid a
# type-only dependency on afw. It's unclear whether this will survive
# Gen2 anyway, or how we might adapt it to work with non-afw catalogs
# (e.g. Parquet).
def getSchemaCatalogs(self) -> Dict[str, Any]:
"""Get the schemas generated by this task.

Returns
-------
schemaCatalogs : `dict`
Keys are butler dataset type, values are an empty catalog (an
instance of the appropriate `lsst.afw.table` Catalog type) for
this task.

Notes
-----

.. warning::

Subclasses that use schemas must override this method. The default
implementation returns an empty dict.

This method may be called at any time after the Task is constructed,
which means that all task schemas should be computed at construction
time, *not* when data is actually processed. This reflects the
philosophy that the schema should not depend on the data.

Returning catalogs rather than just schemas allows us to save e.g.
slots for SourceCatalog as well.

See also
--------
Task.getAllSchemaCatalogs
"""
return {}

def getAllSchemaCatalogs(self) -> Dict[str, Any]:
"""Get schema catalogs for all tasks in the hierarchy, combining the
results into a single dict.

Returns
-------
schemacatalogs : `dict`
Keys are butler dataset type, values are a empty catalog (an
instance of the appropriate `lsst.afw.table` Catalog type) for all
tasks in the hierarchy, from the top-level task down
through all subtasks.

Notes
-----
This method may be called on any task in the hierarchy; it will return
the same answer, regardless.

The default implementation should always suffice. If your subtask uses
schemas the override `Task.getSchemaCatalogs`, not this method.
"""
schemaDict = self.getSchemaCatalogs()
for wref in self._taskDict.values():
subtask = wref()
assert subtask is not None, "Unexpected garbage collection of subtask."
schemaDict.update(subtask.getSchemaCatalogs())
return schemaDict

def getFullMetadata(self) -> TaskMetadata:
"""Get metadata for all tasks.

Expand Down
2 changes: 1 addition & 1 deletion python/lsst/pipe/base/tests/simpleQGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> But


def populateButler(
pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] = None
pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] | None = None
) -> None:
"""Populate data butler with data needed for test.

Expand Down