Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

- Fix unawaited coroutine in `stac_fastapi.core.core`. [#551](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/551)
- Parse `ES_TIMEOUT` environment variable as an integer. [#556](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/556)
- Implemented "Smart Unlink" logic in delete_catalog: when cascade=False (default), collections are unlinked from the catalog and become root-level orphans if they have no other parents, rather than being deleted. When cascade=True, collections are deleted entirely. This prevents accidental data loss and supports poly-hierarchy scenarios where collections belong to multiple catalogs. [#557](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/557)
- Fixed delete_catalog to use reverse lookup query on parent_ids field instead of fragile link parsing. This ensures all collections are found and updated correctly, preventing ghost relationships where collections remain tagged with deleted catalogs, especially in large catalogs or pagination scenarios. [#557](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/557)

### Removed

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ The following organizations have contributed time and/or funding to support the

## Latest News

- **12/09/2025:** **Feature Merge: Federated Catalogs.** The [`Catalogs Endpoint`](https://github.com/Healy-Hyperspatial/stac-api-extensions-catalogs-endpoint) extension is now in main! This enables a registry of catalogs and supports **poly-hierarchy** (collections belonging to multiple catalogs simultaneously). Enable it via `ENABLE_CATALOGS_EXTENSION`. _Coming next: Support for nested sub-catalogs._
- **12/09/2025:** Feature Merge: **Federated Catalogs**. The [`Catalogs Endpoint`](https://github.com/Healy-Hyperspatial/stac-api-extensions-catalogs-endpoint) extension is now in main! This enables a registry of catalogs and supports **poly-hierarchy** (collections belonging to multiple catalogs simultaneously). Enable it via `ENABLE_CATALOGS_EXTENSION`. _Coming next: Support for nested sub-catalogs._
- **11/07/2025:** 🌍 The SFEOS STAC Viewer is now available at: https://healy-hyperspatial.github.io/sfeos-web. Use this site to examine your data and test your STAC API!
- **10/24/2025:** Added `previous_token` pagination using Redis for efficient navigation. This feature allows users to navigate backwards through large result sets by storing pagination state in Redis. To use this feature, ensure Redis is configured (see [Redis for navigation](#redis-for-navigation)) and set `REDIS_ENABLE=true` in your environment.
- **10/23/2025:** The `EXCLUDED_FROM_QUERYABLES` environment variable was added to exclude fields from the `queryables` endpoint. See [docs](#excluding-fields-from-queryables).
Expand Down
130 changes: 54 additions & 76 deletions stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,71 +314,42 @@ async def delete_catalog(
HTTPException: If the catalog is not found.
"""
try:
# Get the catalog to verify it exists and get its collections
db_catalog = await self.client.database.find_catalog(catalog_id)
catalog = self.client.catalog_serializer.db_to_stac(db_catalog, request)
# Get the catalog to verify it exists
await self.client.database.find_catalog(catalog_id)

# Extract collection IDs from catalog links
collection_ids = []
if hasattr(catalog, "links") and catalog.links:
for link in catalog.links:
rel = (
link.get("rel")
if hasattr(link, "get")
else getattr(link, "rel", None)
)
if rel == "child":
href = (
link.get("href", "")
if hasattr(link, "get")
else getattr(link, "href", "")
)
if href and "/collections/" in href:
# Extract collection ID from href
collection_id = href.split("/collections/")[-1].split("/")[
0
]
if collection_id:
collection_ids.append(collection_id)

if cascade:
# Delete each collection
for coll_id in collection_ids:
try:
await self.client.database.delete_collection(coll_id)
# Use reverse lookup query to find all collections with this catalog in parent_ids.
# This is more reliable than parsing links, as it captures all collections
# regardless of pagination or link truncation.
query_body = {"query": {"term": {"parent_ids": catalog_id}}}
search_result = await self.client.database.client.search(
index=COLLECTIONS_INDEX, body=query_body, size=10000
)
children = [hit["_source"] for hit in search_result["hits"]["hits"]]

# Process each child collection
for child in children:
child_id = child.get("id")
try:
if cascade:
# DANGER ZONE: User explicitly requested cascade delete.
# Delete the collection entirely, regardless of other parents.
await self.client.database.delete_collection(child_id)
logger.info(
f"Deleted collection {coll_id} as part of cascade delete for catalog {catalog_id}"
)
except Exception as e:
error_msg = str(e)
if "not found" in error_msg.lower():
logger.debug(
f"Collection {coll_id} not found, skipping (may have been deleted elsewhere)"
)
else:
logger.warning(
f"Failed to delete collection {coll_id}: {e}"
)
else:
# Remove catalog from each collection's parent_ids and links (orphan them)
for coll_id in collection_ids:
try:
# Get the collection from database to access parent_ids
collection_db = await self.client.database.find_collection(
coll_id
f"Deleted collection {child_id} as part of cascade delete for catalog {catalog_id}"
)

# Remove catalog_id from parent_ids
parent_ids = collection_db.get("parent_ids", [])
else:
# SAFE ZONE: Smart Unlink - Remove only this catalog from parent_ids.
# The collection survives and becomes a root-level collection if it has no other parents.
parent_ids = child.get("parent_ids", [])
if catalog_id in parent_ids:
parent_ids.remove(catalog_id)
collection_db["parent_ids"] = parent_ids
child["parent_ids"] = parent_ids

# Also remove the catalog link from the collection's links
if "links" in collection_db:
collection_db["links"] = [
if "links" in child:
child["links"] = [
link
for link in collection_db.get("links", [])
for link in child.get("links", [])
if not (
link.get("rel") == "catalog"
and catalog_id in link.get("href", "")
Expand All @@ -387,30 +358,37 @@ async def delete_catalog(

# Update the collection in the database
await self.client.database.update_collection(
collection_id=coll_id,
collection=collection_db,
refresh=True,
)
logger.info(
f"Removed catalog {catalog_id} from collection {coll_id} parent_ids and links"
collection_id=child_id,
collection=child,
refresh=False,
)

# Log the result
if len(parent_ids) == 0:
logger.info(
f"Collection {child_id} is now a root-level orphan (no parent catalogs)"
)
else:
logger.info(
f"Removed catalog {catalog_id} from collection {child_id}; still belongs to {len(parent_ids)} other catalog(s)"
)
else:
logger.debug(
f"Catalog {catalog_id} not in parent_ids for collection {coll_id}"
)
except Exception as e:
error_msg = str(e)
if "not found" in error_msg.lower():
logger.debug(
f"Collection {coll_id} not found, skipping (may have been deleted elsewhere)"
)
else:
logger.warning(
f"Failed to remove catalog {catalog_id} from collection {coll_id}: {e}"
f"Catalog {catalog_id} not in parent_ids for collection {child_id}"
)
except Exception as e:
error_msg = str(e)
if "not found" in error_msg.lower():
logger.debug(
f"Collection {child_id} not found, skipping (may have been deleted elsewhere)"
)
else:
logger.warning(
f"Failed to process collection {child_id} during catalog deletion: {e}"
)

# Delete the catalog
await self.client.database.delete_catalog(catalog_id)
# Delete the catalog itself
await self.client.database.delete_catalog(catalog_id, refresh=True)
logger.info(f"Deleted catalog {catalog_id}")

except Exception as e:
Expand Down
110 changes: 110 additions & 0 deletions stac_fastapi/tests/extensions/test_catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,116 @@ async def test_catalog_links_contain_all_collections(
), f"Collection {collection_id} missing from catalog links. Found links: {child_hrefs}"


@pytest.mark.asyncio
async def test_delete_catalog_no_cascade_orphans_collections(
catalogs_app_client, load_test_data
):
"""Test that deleting a catalog without cascade makes collections root-level orphans."""
# Create a catalog
test_catalog = load_test_data("test_catalog.json")
catalog_id = f"test-catalog-{uuid.uuid4()}"
test_catalog["id"] = catalog_id
test_catalog["links"] = [
link for link in test_catalog.get("links", []) if link.get("rel") != "child"
]

catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog)
assert catalog_resp.status_code == 201

# Create a collection in the catalog
test_collection = load_test_data("test_collection.json")
collection_id = f"test-collection-{uuid.uuid4()}"
test_collection["id"] = collection_id

create_resp = await catalogs_app_client.post(
f"/catalogs/{catalog_id}/collections", json=test_collection
)
assert create_resp.status_code == 201

# Delete the catalog without cascade (default behavior)
delete_resp = await catalogs_app_client.delete(f"/catalogs/{catalog_id}")
assert delete_resp.status_code == 204

# Verify the collection still exists
get_resp = await catalogs_app_client.get(f"/collections/{collection_id}")
assert get_resp.status_code == 200

# Verify the collection is now a root-level orphan (accessible from /collections)
collections_resp = await catalogs_app_client.get("/collections")
assert collections_resp.status_code == 200
collections_data = collections_resp.json()
collection_ids = [col["id"] for col in collections_data.get("collections", [])]
assert (
collection_id in collection_ids
), "Orphaned collection should appear in root /collections endpoint"

# Verify the collection no longer has a catalog link to the deleted catalog
collection_data = get_resp.json()
collection_links = collection_data.get("links", [])
catalog_link = None
for link in collection_links:
if link.get("rel") == "catalog" and catalog_id in link.get("href", ""):
catalog_link = link
break

assert (
catalog_link is None
), "Orphaned collection should not have link to deleted catalog"


@pytest.mark.asyncio
async def test_delete_catalog_no_cascade_multi_parent_collection(
catalogs_app_client, load_test_data
):
"""Test that deleting a catalog without cascade preserves collections with other parents."""
# Create two catalogs
catalog_ids = []
for i in range(2):
test_catalog = load_test_data("test_catalog.json")
catalog_id = f"test-catalog-{uuid.uuid4()}-{i}"
test_catalog["id"] = catalog_id

catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog)
assert catalog_resp.status_code == 201
catalog_ids.append(catalog_id)

# Create a collection in the first catalog
test_collection = load_test_data("test_collection.json")
collection_id = f"test-collection-{uuid.uuid4()}"
test_collection["id"] = collection_id

create_resp = await catalogs_app_client.post(
f"/catalogs/{catalog_ids[0]}/collections", json=test_collection
)
assert create_resp.status_code == 201

# Add the collection to the second catalog
add_resp = await catalogs_app_client.post(
f"/catalogs/{catalog_ids[1]}/collections", json=test_collection
)
assert add_resp.status_code == 201

# Delete the first catalog without cascade
delete_resp = await catalogs_app_client.delete(f"/catalogs/{catalog_ids[0]}")
assert delete_resp.status_code == 204

# Verify the collection still exists
get_resp = await catalogs_app_client.get(f"/collections/{collection_id}")
assert get_resp.status_code == 200

# Verify the collection is still accessible from the second catalog
get_from_catalog_resp = await catalogs_app_client.get(
f"/catalogs/{catalog_ids[1]}/collections/{collection_id}"
)
assert get_from_catalog_resp.status_code == 200

# Verify the collection is NOT accessible from the deleted catalog
get_from_deleted_resp = await catalogs_app_client.get(
f"/catalogs/{catalog_ids[0]}/collections/{collection_id}"
)
assert get_from_deleted_resp.status_code == 404


@pytest.mark.asyncio
async def test_parent_ids_not_exposed_to_client(catalogs_app_client, load_test_data):
"""Test that parent_ids field is not exposed in API responses."""
Expand Down