Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40150: Remove some APIs scheduled for removal after v26 #937

Merged
merged 9 commits into from
Jan 5, 2024
6 changes: 6 additions & 0 deletions doc/changes/DM-40150.removal.rst
@@ -0,0 +1,6 @@
* Removed ``Butler.datastore`` property. The datastore can no longer be accessed directly.
* Removed ``Butler.datasetExists`` (and the "direct" variant). Please use ``Butler.exists()`` and ``Butler.stored()`` instead.
* Removed ``Butler.getDirect`` and related APIs. ``Butler.get()`` et al now use the ``DatasetRef`` directly if one is given.
* Removed the ``run`` and ``ideGenerationMode`` parameters from ``Butler.ingest()``. They were no longer being used.
* Removed the ``--reuse-ids`` option for the ``butler import`` command-line. This option was no longer used now that UUIDs are used throughout.
* Removed the ``reconsitutedDimension`` parameter from ``Quantum.from_simple``.
11 changes: 1 addition & 10 deletions python/lsst/daf/butler/_butler.py
Expand Up @@ -50,7 +50,7 @@

if TYPE_CHECKING:
from ._dataset_existence import DatasetExistence
from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
from ._dataset_ref import DatasetId, DatasetRef
from ._dataset_type import DatasetType
from ._deferredDatasetHandle import DeferredDatasetHandle
from ._file_dataset import FileDataset
Expand Down Expand Up @@ -1123,8 +1123,6 @@ def ingest(
self,
*datasets: FileDataset,
transfer: str | None = "auto",
run: str | None = None,
idGenerationMode: DatasetIdGenEnum | None = None,
record_validation_info: bool = True,
) -> None:
"""Store and register one or more datasets that already exist on disk.
Expand All @@ -1146,13 +1144,6 @@ def ingest(
If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
transfer the file.
run : `str`, optional
The name of the run ingested datasets should be added to,
overriding ``self.run``. This parameter is now deprecated since
the run is encoded in the ``FileDataset``.
idGenerationMode : `DatasetIdGenEnum`, optional
Specifies option for generating dataset IDs. Parameter is
deprecated.
record_validation_info : `bool`, optional
If `True`, the default, the datastore can record validation
information associated with the file. If `False` the datastore
Expand Down
149 changes: 1 addition & 148 deletions python/lsst/daf/butler/_limited_butler.py
Expand Up @@ -34,7 +34,6 @@
from collections.abc import Iterable
from typing import Any, ClassVar

from deprecated.sphinx import deprecated
from lsst.resources import ResourcePath

from ._dataset_ref import DatasetRef
Expand Down Expand Up @@ -64,43 +63,6 @@ def isWriteable(self) -> bool:
"""Return `True` if this `Butler` supports write operations."""
raise NotImplementedError()

# TODO: remove on DM-40067.
@deprecated(
reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
" Please use Butler.put(). Will be removed after v26.0.",
version="v26.0",
category=FutureWarning,
)
def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
"""Store a dataset that already has a UUID and ``RUN`` collection.

Parameters
----------
obj : `object`
The dataset.
ref : `DatasetRef`
Resolved reference for a not-yet-stored dataset.

Returns
-------
ref : `DatasetRef`
The same as the given, for convenience and symmetry with
`Butler.put`.

Raises
------
TypeError
Raised if the butler is read-only.

Notes
-----
Whether this method inserts the given dataset into a ``Registry`` is
implementation defined (some `LimitedButler` subclasses do not have a
`Registry`), but it always adds the dataset to a `Datastore`, and the
given ``ref.id`` and ``ref.run`` are always preserved.
"""
return self.put(obj, ref)

@abstractmethod
def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
"""Store a dataset that already has a UUID and ``RUN`` collection.
Expand Down Expand Up @@ -174,81 +136,6 @@ def get(
log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)

# TODO: remove on DM-40067.
@deprecated(
reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
" Please use Butler.get(). Will be removed after v26.0.",
version="v26.0",
category=FutureWarning,
)
def getDirect(
self,
ref: DatasetRef,
*,
parameters: dict[str, Any] | None = None,
storageClass: str | StorageClass | None = None,
) -> Any:
"""Retrieve a stored dataset.

Parameters
----------
ref : `DatasetRef`
Resolved reference to an already stored dataset.
parameters : `dict`
Additional StorageClass-defined options to control reading,
typically used to efficiently read only a subset of the dataset.
storageClass : `StorageClass` or `str`, optional
The storage class to be used to override the Python type
returned by this method. By default the returned type matches
the dataset type definition for this dataset. Specifying a
read `StorageClass` can force a different type to be returned.
This type must be compatible with the original type.

Returns
-------
obj : `object`
The dataset.
"""
return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)

# TODO: remove on DM-40067.
@deprecated(
reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
"Please use Butler.getDeferred(). Will be removed after v26.0.",
version="v26.0",
category=FutureWarning,
)
def getDirectDeferred(
self,
ref: DatasetRef,
*,
parameters: dict[str, Any] | None = None,
storageClass: str | StorageClass | None = None,
) -> DeferredDatasetHandle:
"""Create a `DeferredDatasetHandle` which can later retrieve a dataset,
from a resolved `DatasetRef`.

Parameters
----------
ref : `DatasetRef`
Resolved reference to an already stored dataset.
parameters : `dict`
Additional StorageClass-defined options to control reading,
typically used to efficiently read only a subset of the dataset.
storageClass : `StorageClass` or `str`, optional
The storage class to be used to override the Python type
returned by this method. By default the returned type matches
the dataset type definition for this dataset. Specifying a
read `StorageClass` can force a different type to be returned.
This type must be compatible with the original type.

Returns
-------
obj : `DeferredDatasetHandle`
A handle which can be used to retrieve a dataset at a later time.
"""
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)

def getDeferred(
self,
ref: DatasetRef,
Expand Down Expand Up @@ -455,28 +342,6 @@ def stored_many(
"""
return self._datastore.mexists(refs)

# TODO: remove on DM-40079.
@deprecated(
reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). "
"Will be removed after v26.0.",
version="v26.0",
category=FutureWarning,
)
def datasetExistsDirect(self, ref: DatasetRef) -> bool:
"""Return `True` if a dataset is actually present in the Datastore.

Parameters
----------
ref : `DatasetRef`
Resolved reference to a dataset.

Returns
-------
exists : `bool`
Whether the dataset exists in the Datastore.
"""
return self.stored(ref)

def markInputUnused(self, ref: DatasetRef) -> None:
"""Indicate that a predicted input was not actually used when
processing a `Quantum`.
Expand All @@ -489,7 +354,7 @@ def markInputUnused(self, ref: DatasetRef) -> None:
Notes
-----
By default, a dataset is considered "actually used" if it is accessed
via `getDirect` or a handle to it is obtained via `getDirectDeferred`
via `get` or a handle to it is obtained via `getDeferred`
(even if the handle is not used). This method must be called after one
of those in order to remove the dataset from the actual input list.

Expand Down Expand Up @@ -555,18 +420,6 @@ def dimensions(self) -> DimensionUniverse:
"""
raise NotImplementedError()

# TODO: remove on DM-40080.
@property
@deprecated(
reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the "
"relevant functionality. Will be removed after v26.0.",
version="v26.0",
category=FutureWarning,
)
def datastore(self) -> Datastore:
"""The object that manages actual dataset storage (`Datastore`)."""
return self._datastore

_datastore: Datastore
"""The object that manages actual dataset storage (`Datastore`)."""

Expand Down
19 changes: 0 additions & 19 deletions python/lsst/daf/butler/_quantum.py
Expand Up @@ -30,13 +30,11 @@
__all__ = ("Quantum", "SerializedQuantum", "DimensionRecordsAccumulator")

import sys
import warnings
from collections.abc import Iterable, Mapping, MutableMapping, Sequence
from typing import Any

import pydantic
from lsst.utils import doImportType
from lsst.utils.introspection import find_outside_stacklevel

from ._dataset_ref import DatasetRef, SerializedDatasetRef
from ._dataset_type import DatasetType, SerializedDatasetType
Expand Down Expand Up @@ -409,7 +407,6 @@ def from_simple(
cls,
simple: SerializedQuantum,
universe: DimensionUniverse,
reconstitutedDimensions: dict[int, tuple[str, DimensionRecord]] | None = None,
) -> Quantum:
"""Construct a new object from a simplified form.

Expand All @@ -421,24 +418,8 @@ def from_simple(
The value returned by a call to `to_simple`.
universe : `DimensionUniverse`
The special graph of all known dimensions.
reconstitutedDimensions : `dict` of `int` to `DimensionRecord` or None
A mapping of ids to dimension records to be used when populating
dimensions for this Quantum. If supplied it will be used in place
of the dimension Records stored with the SerializedQuantum, if a
required dimension has already been loaded. Otherwise the record
will be unpersisted from the SerializedQuatnum and added to the
reconstitutedDimensions dict (if not None). Defaults to None.
Deprecated, any argument will be ignored. Will be removed after
v26.
"""
initInputs: MutableMapping[DatasetType, DatasetRef] = {}
if reconstitutedDimensions is not None:
# TODO: remove this argument on DM-40150.
warnings.warn(
"The reconstitutedDimensions argument is now ignored and may be removed after v26",
category=FutureWarning,
stacklevel=find_outside_stacklevel("lsst.daf.butler"),
)

# Unpersist all the init inputs
for key, (value, dimensionIds) in simple.initInputs.items():
Expand Down
39 changes: 2 additions & 37 deletions python/lsst/daf/butler/_quantum_backed.py
Expand Up @@ -39,7 +39,6 @@
from typing import TYPE_CHECKING, Any

import pydantic
from deprecated.sphinx import deprecated
from lsst.resources import ResourcePathExpression

from ._butler_config import ButlerConfig
Expand Down Expand Up @@ -380,23 +379,6 @@ def isWriteable(self) -> bool:
# Docstring inherited.
return True

# TODO: remove on DM-40067.
@deprecated(
reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
" Please use Butler.get(). Will be removed after v26.0.",
version="v26.0",
category=FutureWarning,
)
def getDirect(
self,
ref: DatasetRef,
*,
parameters: dict[str, Any] | None = None,
storageClass: str | StorageClass | None = None,
) -> Any:
# Docstring inherited.
return self.get(ref, parameters=parameters, storageClass=storageClass)

def get(
self,
ref: DatasetRef,
Expand All @@ -420,23 +402,6 @@ def get(
self._available_inputs.add(ref.id)
return obj

# TODO: remove on DM-40067.
@deprecated(
reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
"Please use Butler.getDeferred(). Will be removed after v26.0.",
version="v26.0",
category=FutureWarning,
)
def getDirectDeferred(
self,
ref: DatasetRef,
*,
parameters: dict[str, Any] | None = None,
storageClass: str | StorageClass | None = None,
) -> DeferredDatasetHandle:
# Docstring inherited.
return self.getDeferred(ref, parameters=parameters, storageClass=storageClass)

def getDeferred(
self,
ref: DatasetRef,
Expand Down Expand Up @@ -553,12 +518,12 @@ def extract_provenance_data(self) -> QuantumProvenanceData:
authors from having to worry about while still recording very
detailed information. But it has two small weaknesses:

- Calling `getDirectDeferred` or `getDirect` is enough to mark a
- Calling `getDeferred` or `get` is enough to mark a
dataset as an "actual input", which may mark some datasets that
aren't actually used. We rely on task authors to use
`markInputUnused` to address this.

- We assume that the execution system will call ``datasetExistsDirect``
- We assume that the execution system will call ``stored``
on all predicted inputs prior to execution, in order to populate the
"available inputs" set. This is what I envision
'`~lsst.ctrl.mpexec.SingleQuantumExecutor` doing after we update it
Expand Down
6 changes: 0 additions & 6 deletions python/lsst/daf/butler/cli/cmd/commands.py
Expand Up @@ -113,15 +113,9 @@ def associate(**kwargs: Any) -> None:
metavar=typeStrAcceptsMultiple,
help="Dimensions that should be skipped during import",
)
@click.option("--reuse-ids", is_flag=True, help="Force re-use of imported dataset IDs for integer IDs.")
@options_file_option()
def butler_import(*args: Any, **kwargs: Any) -> None:
"""Import data into a butler repository."""
# `reuse_ids`` is not used by `butlerImport`.
reuse_ids = kwargs.pop("reuse_ids", False)
if reuse_ids:
click.echo("WARNING: --reuse-ids option is deprecated and will be removed after v26.")

script.butlerImport(*args, **kwargs)


Expand Down