Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-26600: Add ability to remove a dataset type #379

Merged
merged 1 commit into from
Sep 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 28 additions & 0 deletions python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,34 @@ def registerDatasetType(self, datasetType: DatasetType) -> bool:
_, inserted = self._datasets.register(datasetType)
return inserted

def removeDatasetType(self, name: str) -> None:
"""Remove the named `DatasetType` from the registry.

.. warning::

Registry caches the dataset type definitions. This means that
deleting the dataset type definition may result in unexpected
behavior from other butler processes that are active that have
not seen the deletion.

Parameters
----------
name : `str`
Name of the type to be removed.

Raises
------
lsst.daf.butler.registry.OrphanedRecordError
Raised if an attempt is made to remove the dataset type definition
when there are already datasets associated with it.

Notes
-----
If the dataset type is not registered the method will return without
action.
"""
self._datasets.remove(name, universe=self._dimensions.universe)

def getDatasetType(self, name: str) -> DatasetType:
"""Get the `DatasetType`.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
DimensionGraph,
DimensionUniverse,
)
from lsst.daf.butler.registry import ConflictingDefinitionError
from lsst.daf.butler.registry import ConflictingDefinitionError, OrphanedRecordError
from lsst.daf.butler.registry.interfaces import (
DatasetRecordStorage,
DatasetRecordStorageManager,
Expand Down Expand Up @@ -115,6 +115,23 @@ def refresh(self, *, universe: DimensionUniverse) -> None:
self._byName = byName
self._byId = byId

def remove(self, name: str, *, universe: DimensionUniverse) -> None:
# Docstring inherited from DatasetRecordStorageManager.
compositeName, componentName = DatasetType.splitDatasetTypeName(name)
if componentName is not None:
raise ValueError(f"Cannot delete a dataset type of a component of a composite (given {name})")

# Delete the row
try:
self._db.delete(self._static.dataset_type, ["name"], {"name": name})
except sqlalchemy.exc.IntegrityError as e:
raise OrphanedRecordError(f"Dataset type {name} can not be removed."
" It is associated with datasets that must be removed first.") from e

# Now refresh everything -- removal is rare enough that this does
# not need to be fast.
self.refresh(universe=universe)

def find(self, name: str) -> Optional[DatasetRecordStorage]:
# Docstring inherited from DatasetRecordStorageManager.
compositeName, componentName = DatasetType.splitDatasetTypeName(name)
Expand Down
13 changes: 13 additions & 0 deletions python/lsst/daf/butler/registry/interfaces/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,19 @@ def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool
"""
raise NotImplementedError()

@abstractmethod
def remove(self, name: str, *, universe: DimensionUniverse) -> None:
"""Remove the dataset type.

Parameters
----------
name : `str`
Name of the dataset type.
universe : `DimensionUniverse`
Universe to use to trigger a refresh following removal.
"""
raise NotImplementedError()

@abstractmethod
def __iter__(self) -> Iterator[DatasetType]:
"""Return an iterator over the the dataset types present in this layer.
Expand Down
27 changes: 26 additions & 1 deletion tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def mock_s3(cls):
from lsst.daf.butler import CollectionSearch, CollectionType
from lsst.daf.butler import ButlerURI
from lsst.daf.butler import script
from lsst.daf.butler.registry import MissingCollectionError
from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError
from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials,
unsetAwsEnvCredentials)
Expand Down Expand Up @@ -589,6 +589,27 @@ def testPruneCollections(self):
ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)

# Add a new dataset type and delete it
tmpName = "prune_collections_disposable"
tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass,
butler.registry)
tmpFromRegistry = butler.registry.getDatasetType(tmpName)
self.assertEqual(tmpDatasetType, tmpFromRegistry)
butler.registry.removeDatasetType(tmpName)
with self.assertRaises(KeyError):
butler.registry.getDatasetType(tmpName)
# Removing a second time is fine
butler.registry.removeDatasetType(tmpName)

# Component removal is not allowed
with self.assertRaises(ValueError):
butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component"))

# Try and fail to delete a datasetType that is associated with data
with self.assertRaises(OrphanedRecordError):
butler.registry.removeDatasetType(datasetType.name)

# Try to delete a RUN collection without purge, or with purge and not
# unstore.
with self.assertRaises(TypeError):
Expand Down Expand Up @@ -683,6 +704,10 @@ def testPruneCollections(self):
self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
[])

# Now that the collections have been pruned we can remove the
# dataset type
butler.registry.removeDatasetType(datasetType.name)

def testPickle(self):
"""Test pickle support.
"""
Expand Down