From 952a29018677d12aa8f8d3bab132f004a8675cd8 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Mon, 29 Sep 2025 20:27:48 +0800 Subject: [PATCH 01/22] update mappings --- .../sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index b2d7264d6..18f6b8165 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -160,7 +160,9 @@ class Geometry(Protocol): # noqa "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, "properties": { "id": {"type": "keyword"}, - "extent.spatial.bbox": {"type": "long"}, + "title": {"type": "text"}, + "description": {"type": "text"}, + "bbox_shape": {"type": "geo_shape"}, "extent.temporal.interval": { "type": "date", "format": "strict_date_optional_time||epoch_millis", From f30b4e4be3364b9c8a8d33c7358f1e47949e7d98 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Mon, 29 Sep 2025 20:37:40 +0800 Subject: [PATCH 02/22] update core args --- stac_fastapi/core/stac_fastapi/core/core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index ac2f228d2..df07a057e 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -240,14 +240,17 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage: async def all_collections( self, - datetime: Optional[str] = None, limit: Optional[int] = None, + bbox: Optional[BBox] = None, + datetime: Optional[str] = None, fields: Optional[List[str]] = None, sortby: Optional[Union[str, List[str]]] = None, filter_expr: Optional[str] = None, filter_lang: Optional[str] = None, q: Optional[Union[str, List[str]]] = None, query: Optional[str] = None, + request: Request = None, + token: Optional[str] = None, **kwargs, ) -> stac_types.Collections: """Read all collections from the database. @@ -266,7 +269,6 @@ async def all_collections( Returns: A Collections object containing all the collections in the database and links to various resources. """ - request = kwargs["request"] base_url = str(request.base_url) # Get the global limit from environment variable From d167a29c93f2fef625245447374e08acefe833af Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 13:24:07 +0800 Subject: [PATCH 03/22] bbox scratch --- CHANGELOG.md | 6 + compose.yml | 2 + recreate_collections_index.py | 121 +++++++++ .../stac_fastapi/core/base_database_logic.py | 4 + stac_fastapi/core/stac_fastapi/core/core.py | 10 +- .../core/extensions/collections_search.py | 1 + .../core/stac_fastapi/core/serializers.py | 55 ++++- .../elasticsearch/database_logic.py | 42 +++- .../stac_fastapi/opensearch/database_logic.py | 42 +++- .../tests/api/test_api_search_collections.py | 229 ++++++++++++++++++ update_collections_mapping.py | 137 +++++++++++ 11 files changed, 643 insertions(+), 6 deletions(-) create mode 100644 recreate_collections_index.py create mode 100644 update_collections_mapping.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 94dbe9a97..635fdca24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries. +- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments. + ### Changed ### Fixed +- Issue where token was not being passed from request to POST collections search logic +- Issue where datetime param was not being passed from POST collections search logic to Elasticsearch + [v6.5.0] - 2025-09-29 ### Added diff --git a/compose.yml b/compose.yml index 77d64198b..8c83ae127 100644 --- a/compose.yml +++ b/compose.yml @@ -22,6 +22,7 @@ services: - ES_VERIFY_CERTS=false - BACKEND=elasticsearch - DATABASE_REFRESH=true + - ENABLE_COLLECTIONS_SEARCH_ROUTE=true ports: - "8080:8080" volumes: @@ -56,6 +57,7 @@ services: - ES_VERIFY_CERTS=false - BACKEND=opensearch - STAC_FASTAPI_RATE_LIMIT=200/minute + - ENABLE_COLLECTIONS_SEARCH_ROUTE=true ports: - "8082:8082" volumes: diff --git a/recreate_collections_index.py b/recreate_collections_index.py new file mode 100644 index 000000000..047a14675 --- /dev/null +++ b/recreate_collections_index.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +Script to delete and recreate the collections index. + +WARNING: This will DELETE all existing collections! +Only use this in development environments. + +Usage: + python recreate_collections_index.py +""" + +import asyncio +import os +import sys + +from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX + +# Determine which backend to use +BACKEND = os.getenv("BACKEND", "elasticsearch").lower() + +if BACKEND == "opensearch": + from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, + ) + from stac_fastapi.opensearch.database_logic import ( + create_collection_index, + create_index_templates, + ) +else: + from stac_fastapi.elasticsearch.config import ( + AsyncElasticsearchSettings as AsyncSearchSettings, + ) + from stac_fastapi.elasticsearch.database_logic import ( + create_collection_index, + create_index_templates, + ) + + +async def recreate_index(): + """Delete and recreate the collections index.""" + settings = AsyncSearchSettings() + client = settings.create_client + + print(f"Using backend: {BACKEND}") + print(f"\n{'=' * 60}") + print("WARNING: This will DELETE all existing collections!") + print(f"{'=' * 60}\n") + + # Check if running in production + env = os.getenv("ENVIRONMENT", "development").lower() + if env == "production": + print("ERROR: This script should not be run in production!") + print("Use update_collections_mapping.py instead.") + sys.exit(1) + + response = input("Are you sure you want to continue? (yes/no): ") + if response.lower() != "yes": + print("Aborted.") + sys.exit(0) + + try: + # Delete the collections index + index_name = f"{COLLECTIONS_INDEX}-000001" + alias_name = COLLECTIONS_INDEX + + print(f"\nDeleting index {index_name}...") + exists = await client.indices.exists(index=index_name) + if exists: + await client.indices.delete(index=index_name) + print(f"✓ Deleted index {index_name}") + else: + print(f"⊘ Index {index_name} does not exist") + + # Check if alias exists and delete it + alias_exists = await client.indices.exists_alias(name=alias_name) + if alias_exists: + print(f"Deleting alias {alias_name}...") + await client.indices.delete_alias( + index="_all", name=alias_name, ignore=[404] + ) + print(f"✓ Deleted alias {alias_name}") + + # Recreate index templates + print("\nRecreating index templates...") + await create_index_templates() + print("✓ Index templates created") + + # Recreate the collections index + print("\nRecreating collections index...") + await create_collection_index() + print("✓ Collections index created") + + # Verify the mapping includes bbox_shape + print("\nVerifying mapping...") + mapping = await client.indices.get_mapping(index=index_name) + properties = mapping[index_name]["mappings"]["properties"] + + if "bbox_shape" in properties: + print( + f"✓ bbox_shape field is present in mapping: {properties['bbox_shape']}" + ) + else: + print("✗ WARNING: bbox_shape field is NOT in the mapping!") + + print("\n" + "=" * 60) + print("Collections index successfully recreated!") + print("You can now create collections with bbox_shape support.") + print("=" * 60) + + except Exception as e: + print(f"\n✗ Error: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + finally: + await client.close() + + +if __name__ == "__main__": + asyncio.run(recreate_index()) diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index c592b6d26..c0a85498f 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -3,6 +3,8 @@ import abc from typing import Any, Dict, Iterable, List, Optional, Tuple +from stac_pydantic.shared import BBox + class BaseDatabaseLogic(abc.ABC): """ @@ -19,6 +21,7 @@ async def get_all_collections( limit: int, request: Any = None, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[BBox] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from the database, supporting pagination. @@ -27,6 +30,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Any, optional): The FastAPI request object. Defaults to None. sort (Optional[List[Dict[str, Any]]], optional): Optional sort parameter. Defaults to None. + bbox (Optional[BBox], optional): Bounding box to filter collections by spatial extent. Defaults to None. Returns: A tuple of (collections, next pagination token if any). diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index df07a057e..e2bdf4952 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -300,7 +300,9 @@ async def all_collections( else: limit = 10 - token = request.query_params.get("token") + # Get token from query params only if not already provided (for GET requests) + if token is None: + token = request.query_params.get("token") # Process fields parameter for filtering collection properties includes, excludes = set(), set() @@ -400,6 +402,7 @@ async def all_collections( limit=limit, request=request, sort=sort, + bbox=bbox, q=q_list, filter=parsed_filter, query=parsed_query, @@ -501,6 +504,11 @@ async def post_all_collections( # Pass all parameters from search_request to all_collections return await self.all_collections( limit=search_request.limit if hasattr(search_request, "limit") else None, + bbox=search_request.bbox if hasattr(search_request, "bbox") else None, + datetime=search_request.datetime + if hasattr(search_request, "datetime") + else None, + token=search_request.token if hasattr(search_request, "token") else None, fields=fields, sortby=sortby, filter_expr=search_request.filter diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py index 0ddbefeda..62ec00340 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py @@ -18,6 +18,7 @@ class CollectionsSearchRequest(ExtendedSearch): """Extended search model for collections with free text search support.""" q: Optional[Union[str, List[str]]] = None + token: Optional[str] = None class CollectionsSearchEndpointExtension(ApiExtension): diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index 1700ac598..a8c17d2b7 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -1,6 +1,7 @@ """Serializers.""" import abc +import logging from copy import deepcopy from typing import Any, List, Optional @@ -9,10 +10,12 @@ from stac_fastapi.core.datetime_utils import now_to_rfc3339_str from stac_fastapi.core.models.links import CollectionLinks -from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env from stac_fastapi.types import stac as stac_types from stac_fastapi.types.links import ItemLinks, resolve_links +logger = logging.getLogger(__name__) + @attr.s class Serializer(abc.ABC): @@ -141,6 +144,53 @@ def stac_to_db( collection.get("links", []), str(request.base_url) ) + # Convert bbox to bbox_shape for geospatial queries + if "extent" in collection and "spatial" in collection["extent"]: + spatial_extent = collection["extent"]["spatial"] + if "bbox" in spatial_extent and spatial_extent["bbox"]: + # Get the first bbox (collections can have multiple bboxes, but we use the first one) + bbox = ( + spatial_extent["bbox"][0] + if isinstance(spatial_extent["bbox"][0], list) + else spatial_extent["bbox"] + ) + collection_id = collection.get("id", "unknown") + logger.debug( + f"Converting bbox to bbox_shape for collection '{collection_id}': bbox={bbox}" + ) + + if len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For 2D polygon, we only need the x,y coordinates and discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox: [minx, miny, maxx, maxy] + maxx, maxy = bbox[2], bbox[3] + logger.debug( + f"Collection '{collection_id}': Processing 2D bbox" + ) + else: + # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] + # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + logger.debug( + f"Collection '{collection_id}': Processing 3D bbox, discarding altitude values at indices 2 and 5" + ) + + # Convert bbox to GeoJSON polygon + bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) + collection["bbox_shape"] = { + "type": "Polygon", + "coordinates": bbox_polygon_coords, + } + logger.info( + f"Collection '{collection_id}': Created bbox_shape from bbox [{minx}, {miny}, {maxx}, {maxy}]" + ) + else: + logger.warning( + f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" + ) + if get_bool_env("STAC_INDEX_ASSETS"): collection["assets"] = [ {"es_key": k, **v} for k, v in collection.get("assets", {}).items() @@ -168,6 +218,9 @@ def db_to_stac( # Avoid modifying the input dict in-place ... doing so breaks some tests collection = deepcopy(collection) + # Remove internal bbox_shape field (not part of STAC spec) + collection.pop("bbox_shape", None) + # Set defaults collection_id = collection.get("id") collection.setdefault("type", "Collection") diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index f4f33cb97..699e436a6 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -175,6 +175,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[List[float]] = None, q: Optional[List[str]] = None, filter: Optional[Dict[str, Any]] = None, query: Optional[Dict[str, Dict[str, Any]]] = None, @@ -187,6 +188,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent. q (Optional[List[str]]): Free text search terms. query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters. filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. @@ -309,12 +311,49 @@ async def get_all_collections( query_parts.append(search_dict["query"]) except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Error converting query to Elasticsearch: {e}") # If there's an error, add a query that matches nothing query_parts.append({"bool": {"must_not": {"match_all": {}}}}) raise + # Apply bbox filter if provided + if bbox: + # Parse bbox if it's a string (from GET requests) + if isinstance(bbox, str): + try: + bbox = [float(x.strip()) for x in bbox.split(",")] + except (ValueError, AttributeError) as e: + logger.error(f"Invalid bbox format: {bbox}, error: {e}") + bbox = None + + if bbox and len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For geospatial queries, we discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to a polygon for geo_shape query + bbox_polygon = { + "type": "Polygon", + "coordinates": bbox2polygon(minx, miny, maxx, maxy), + } + # Add geo_shape query to filter collections by bbox_shape field + query_parts.append( + { + "geo_shape": { + "bbox_shape": { + "shape": bbox_polygon, + "relation": "intersects", + } + } + } + ) + # Combine all query parts with AND logic if there are multiple datetime_filter = None if datetime: @@ -381,7 +420,6 @@ async def get_all_collections( try: matched = count_task.result().get("count") except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Count task failed: {e}") return collections, next_token, matched diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 8791390bb..682394141 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -159,6 +159,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[List[float]] = None, q: Optional[List[str]] = None, filter: Optional[Dict[str, Any]] = None, query: Optional[Dict[str, Dict[str, Any]]] = None, @@ -171,6 +172,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent. q (Optional[List[str]]): Free text search terms. query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters. filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. @@ -293,12 +295,49 @@ async def get_all_collections( query_parts.append(search_dict["query"]) except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Error converting query to OpenSearch: {e}") # If there's an error, add a query that matches nothing query_parts.append({"bool": {"must_not": {"match_all": {}}}}) raise + # Apply bbox filter if provided + if bbox: + # Parse bbox if it's a string (from GET requests) + if isinstance(bbox, str): + try: + bbox = [float(x.strip()) for x in bbox.split(",")] + except (ValueError, AttributeError) as e: + logger.error(f"Invalid bbox format: {bbox}, error: {e}") + bbox = None + + if bbox and len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For geospatial queries, we discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to a polygon for geo_shape query + bbox_polygon = { + "type": "Polygon", + "coordinates": bbox2polygon(minx, miny, maxx, maxy), + } + # Add geo_shape query to filter collections by bbox_shape field + query_parts.append( + { + "geo_shape": { + "bbox_shape": { + "shape": bbox_polygon, + "relation": "intersects", + } + } + } + ) + # Combine all query parts with AND logic if there are multiple datetime_filter = None if datetime: @@ -365,7 +404,6 @@ async def get_all_collections( try: matched = count_task.result().get("count") except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Count task failed: {e}") return collections, next_token, matched diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 8f5bed73b..719b85efd 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -857,3 +857,232 @@ async def test_collections_pagination_all_endpoints(app_client, txn_client, ctx) for i, expected_id in enumerate(expected_ids): assert test_found[i]["id"] == expected_id + + +@pytest.mark.asyncio +async def test_collections_bbox_all_endpoints(app_client, txn_client, ctx): + """Verify GET /collections, GET /collections-search, and POST /collections-search honor the bbox parameter.""" + # Create multiple collections with different spatial extents + base_collection = ctx.collection + + # Use unique prefixes to avoid conflicts between tests + test_prefix = f"bbox-{uuid.uuid4().hex[:8]}" + + # Create collections with different bboxes + # Collection 1: Europe bbox + collection_europe = base_collection.copy() + collection_europe["id"] = f"{test_prefix}-europe" + collection_europe["title"] = "Europe Collection" + collection_europe["extent"] = { + "spatial": {"bbox": [[-10.0, 35.0, 40.0, 70.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_europe) + + # Collection 2: North America bbox + collection_na = base_collection.copy() + collection_na["id"] = f"{test_prefix}-north-america" + collection_na["title"] = "North America Collection" + collection_na["extent"] = { + "spatial": {"bbox": [[-170.0, 15.0, -50.0, 75.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_na) + + # Collection 3: Asia bbox + collection_asia = base_collection.copy() + collection_asia["id"] = f"{test_prefix}-asia" + collection_asia["title"] = "Asia Collection" + collection_asia["extent"] = { + "spatial": {"bbox": [[60.0, -10.0, 150.0, 55.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_asia) + + # Collection 4: Global bbox (should match any query) + collection_global = base_collection.copy() + collection_global["id"] = f"{test_prefix}-global" + collection_global["title"] = "Global Collection" + collection_global["extent"] = { + "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_global) + + # Collection 5: 3D bbox (with altitude) - should still work for 2D queries + collection_3d = base_collection.copy() + collection_3d["id"] = f"{test_prefix}-3d-europe" + collection_3d["title"] = "3D Europe Collection" + collection_3d["extent"] = { + "spatial": {"bbox": [[-10.0, 35.0, 0.0, 40.0, 70.0, 5000.0]]}, # 3D bbox + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_3d) + + await refresh_indices(txn_client) + + # Test 1: Query for Europe region - should match Europe, Global, and 3D Europe collections + europe_bbox = [0.0, 40.0, 20.0, 60.0] # Central Europe + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, europe_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, europe_bbox)))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"bbox": europe_bbox}, + }, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find Europe, Global, and 3D Europe collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-europe" in found_ids + ), f"Europe collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" in found_ids + ), f"3D Europe collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find North America or Asia + assert ( + f"{test_prefix}-north-america" not in found_ids + ), f"North America should not match Europe bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-asia" not in found_ids + ), f"Asia should not match Europe bbox in {endpoint['method']} {endpoint['path']}" + + # Test 2: Query for North America region - should match North America and Global collections + na_bbox = [-120.0, 30.0, -80.0, 50.0] # Central North America + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, na_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, na_bbox)))], + }, + {"method": "POST", "path": "/collections-search", "body": {"bbox": na_bbox}}, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find North America and Global collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-north-america" in found_ids + ), f"North America collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find Europe, Asia, or 3D Europe + assert ( + f"{test_prefix}-europe" not in found_ids + ), f"Europe should not match North America bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-asia" not in found_ids + ), f"Asia should not match North America bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" not in found_ids + ), f"3D Europe should not match North America bbox in {endpoint['method']} {endpoint['path']}" + + # Test 3: Query for Asia region - should match Asia and Global collections + asia_bbox = [100.0, 20.0, 130.0, 45.0] # East Asia + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, asia_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, asia_bbox)))], + }, + {"method": "POST", "path": "/collections-search", "body": {"bbox": asia_bbox}}, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find Asia and Global collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-asia" in found_ids + ), f"Asia collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find Europe, North America, or 3D Europe + assert ( + f"{test_prefix}-europe" not in found_ids + ), f"Europe should not match Asia bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-north-america" not in found_ids + ), f"North America should not match Asia bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" not in found_ids + ), f"3D Europe should not match Asia bbox in {endpoint['method']} {endpoint['path']}" diff --git a/update_collections_mapping.py b/update_collections_mapping.py new file mode 100644 index 000000000..331d5aa84 --- /dev/null +++ b/update_collections_mapping.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Script to update the collections index mapping to add the bbox_shape field. + +This script will: +1. Add the bbox_shape field to the existing collections index +2. Reindex all collections to populate the bbox_shape field + +Usage: + python update_collections_mapping.py +""" + +import asyncio +import os +from unittest.mock import Mock + +from stac_fastapi.core.serializers import CollectionSerializer +from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX + +# Determine which backend to use +BACKEND = os.getenv("BACKEND", "elasticsearch").lower() + +if BACKEND == "opensearch": + from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, + ) +else: + from stac_fastapi.elasticsearch.config import ( + AsyncElasticsearchSettings as AsyncSearchSettings, + ) + + +async def update_mapping(): + """Update the collections index mapping to add bbox_shape field.""" + settings = AsyncSearchSettings() + client = settings.create_client + + print(f"Connecting to {BACKEND}...") + + # Check if index exists + index_name = f"{COLLECTIONS_INDEX}-000001" + exists = await client.indices.exists(index=index_name) + + if not exists: + print(f"Index {index_name} does not exist. Creating it...") + from stac_fastapi.elasticsearch.database_logic import create_collection_index + + await create_collection_index() + print("Index created successfully!") + return + + print(f"Index {index_name} exists. Updating mapping...") + + # Add the bbox_shape field to the mapping + try: + await client.indices.put_mapping( + index=index_name, body={"properties": {"bbox_shape": {"type": "geo_shape"}}} + ) + print("✓ Mapping updated successfully!") + except Exception as e: + print(f"✗ Error updating mapping: {e}") + return + + # Now reindex all collections to populate bbox_shape + print("\nReindexing collections to populate bbox_shape field...") + + try: + # Get all collections + response = await client.search( + index=index_name, + body={ + "query": {"match_all": {}}, + "size": 1000, # Adjust if you have more collections + }, + ) + + collections = response["hits"]["hits"] + print(f"Found {len(collections)} collections to update") + + if len(collections) == 0: + print("No collections to update.") + return + + # Create a mock request for the serializer + mock_request = Mock() + mock_request.base_url = "http://localhost:8080/" + + updated_count = 0 + error_count = 0 + + for hit in collections: + collection = hit["_source"] + collection_id = collection.get("id", "unknown") + + try: + # Use the serializer to convert bbox to bbox_shape + updated_collection = CollectionSerializer.stac_to_db( + collection, mock_request + ) + + # Check if bbox_shape was created + if "bbox_shape" in updated_collection: + # Update the document + await client.update( + index=index_name, + id=hit["_id"], + body={"doc": {"bbox_shape": updated_collection["bbox_shape"]}}, + refresh=True, + ) + print(f" ✓ Updated collection '{collection_id}'") + updated_count += 1 + else: + print(f" ⊘ Collection '{collection_id}' has no bbox to convert") + except Exception as e: + print(f" ✗ Error updating collection '{collection_id}': {e}") + error_count += 1 + + print("\n" + "=" * 60) + print("Summary:") + print(f" Total collections: {len(collections)}") + print(f" Successfully updated: {updated_count}") + print(f" Errors: {error_count}") + print(f" Skipped (no bbox): {len(collections) - updated_count - error_count}") + print("=" * 60) + + except Exception as e: + print(f"✗ Error during reindexing: {e}") + import traceback + + traceback.print_exc() + finally: + await client.close() + + +if __name__ == "__main__": + print(f"Using backend: {BACKEND}") + asyncio.run(update_mapping()) From 6dea71543e599c654f4b259a41098b1ae61725b8 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 13:30:10 +0800 Subject: [PATCH 04/22] update pagination test --- .../tests/api/test_api_search_collections.py | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 719b85efd..9bd37f0d5 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -787,17 +787,35 @@ async def test_collections_pagination_all_endpoints(app_client, txn_client, ctx) for i, expected_id in enumerate(expected_ids): assert test_found[i]["id"] == expected_id - # Test second page using the token from the first page - if "token" in resp_json and resp_json["token"]: - token = resp_json["token"] - - # Make the request with token + # Test second page using the token from the next link + next_link = None + for link in resp_json.get("links", []): + if link.get("rel") == "next": + next_link = link + break + + if next_link: + # Extract token based on method if endpoint["method"] == "GET": - params = [(endpoint["param"], str(limit)), ("token", token)] - resp = await app_client.get(endpoint["path"], params=params) + # For GET, token is in the URL query params + from urllib.parse import parse_qs, urlparse + + parsed_url = urlparse(next_link["href"]) + query_params = parse_qs(parsed_url.query) + token = query_params.get("token", [None])[0] + + if token: + params = [(endpoint["param"], str(limit)), ("token", token)] + resp = await app_client.get(endpoint["path"], params=params) + else: + continue # Skip if no token found else: # POST - body = {endpoint["body_key"]: limit, "token": token} - resp = await app_client.post(endpoint["path"], json=body) + # For POST, token is in the body + body = next_link.get("body", {}) + if "token" in body: + resp = await app_client.post(endpoint["path"], json=body) + else: + continue # Skip if no token found assert ( resp.status_code == 200 @@ -805,10 +823,7 @@ async def test_collections_pagination_all_endpoints(app_client, txn_client, ctx) resp_json = resp.json() # Filter to our test collections - if endpoint["path"] == "/collections": - found_collections = resp_json - else: # For collection-search endpoints - found_collections = resp_json["collections"] + found_collections = resp_json["collections"] test_found = [ c for c in found_collections if c["id"].startswith(test_prefix) From 385e310f02ebdbb8bcbb47f63e2fcd7291d0d291 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 13:54:44 +0800 Subject: [PATCH 05/22] update tests to use all endpoints --- CHANGELOG.md | 3 +- .../core/extensions/collections_search.py | 3 + .../tests/api/test_api_search_collections.py | 570 +++++++++++------- 3 files changed, 364 insertions(+), 212 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 635fdca24..4884d6589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed -- Issue where token was not being passed from request to POST collections search logic +- Issue where token, query param was not being passed to POST collections search logic - Issue where datetime param was not being passed from POST collections search logic to Elasticsearch +- Collections search tests to ensure both GET /collections and POST /collections-search endpoints are tested [v6.5.0] - 2025-09-29 diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py index 62ec00340..d36197d03 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py @@ -19,6 +19,9 @@ class CollectionsSearchRequest(ExtendedSearch): q: Optional[Union[str, List[str]]] = None token: Optional[str] = None + query: Optional[ + str + ] = None # Legacy query extension (deprecated but still supported) class CollectionsSearchEndpointExtension(ApiExtension): diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 9bd37f0d5..19c9c6071 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -8,7 +8,7 @@ @pytest.mark.asyncio async def test_collections_sort_id_asc(app_client, txn_client, ctx): - """Verify GET /collections honors ascending sort on id.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor ascending sort on id.""" # Create multiple collections with different ids base_collection = ctx.collection @@ -25,29 +25,48 @@ async def test_collections_sort_id_asc(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Test ascending sort by id - resp = await app_client.get( - "/collections", - params=[("sortby", "+id")], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + # Define endpoints to test + endpoints = [ + {"method": "GET", "path": "/collections", "params": [("sortby", "+id")]}, + { + "method": "GET", + "path": "/collections-search", + "params": [("sortby", "+id")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"sortby": [{"field": "id", "direction": "asc"}]}, + }, ] - # Collections should be sorted alphabetically by id - sorted_ids = sorted(collection_ids) - assert len(test_collections) == len(collection_ids) - for i, expected_id in enumerate(sorted_ids): - assert test_collections[i]["id"] == expected_id + for endpoint in endpoints: + # Test ascending sort by id + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Collections should be sorted alphabetically by id + sorted_ids = sorted(collection_ids) + assert len(test_collections) == len(collection_ids) + for i, expected_id in enumerate(sorted_ids): + assert test_collections[i]["id"] == expected_id @pytest.mark.asyncio async def test_collections_sort_id_desc(app_client, txn_client, ctx): - """Verify GET /collections honors descending sort on id.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor descending sort on id.""" # Create multiple collections with different ids base_collection = ctx.collection @@ -64,24 +83,43 @@ async def test_collections_sort_id_desc(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Test descending sort by id - resp = await app_client.get( - "/collections", - params=[("sortby", "-id")], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + # Define endpoints to test + endpoints = [ + {"method": "GET", "path": "/collections", "params": [("sortby", "-id")]}, + { + "method": "GET", + "path": "/collections-search", + "params": [("sortby", "-id")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"sortby": [{"field": "id", "direction": "desc"}]}, + }, ] - # Collections should be sorted in reverse alphabetical order by id - sorted_ids = sorted(collection_ids, reverse=True) - assert len(test_collections) == len(collection_ids) - for i, expected_id in enumerate(sorted_ids): - assert test_collections[i]["id"] == expected_id + for endpoint in endpoints: + # Test descending sort by id + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Collections should be sorted in reverse alphabetical order by id + sorted_ids = sorted(collection_ids, reverse=True) + assert len(test_collections) == len(collection_ids) + for i, expected_id in enumerate(sorted_ids): + assert test_collections[i]["id"] == expected_id @pytest.mark.asyncio @@ -245,7 +283,7 @@ async def test_collections_free_text_all_endpoints( @pytest.mark.asyncio async def test_collections_filter_search(app_client, txn_client, ctx): - """Verify GET /collections honors the filter parameter for structured search.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor the filter parameter for structured search.""" # Create multiple collections with different content base_collection = ctx.collection @@ -287,52 +325,97 @@ async def test_collections_filter_search(app_client, txn_client, ctx): # Use the ID of the first test collection for the filter test_collection_id = test_collections[0]["id"] + # Test 1: CQL2-JSON format # Create a simple filter for exact ID match using CQL2-JSON filter_expr = {"op": "=", "args": [{"property": "id"}, test_collection_id]} # Convert to JSON string for URL parameter filter_json = json.dumps(filter_expr) - # Use CQL2-JSON format with explicit filter-lang - resp = await app_client.get( - f"/collections?filter={filter_json}&filter-lang=cql2-json", - ) + # Define endpoints to test + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("filter", filter_json), ("filter-lang", "cql2-json")], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("filter", filter_json), ("filter-lang", "cql2-json")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"filter": filter_expr, "filter-lang": "cql2-json"}, + }, + ] - assert resp.status_code == 200 - resp_json = resp.json() + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Should find exactly one collection with the specified ID - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() - assert ( - len(found_collections) == 1 - ), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)}" - assert found_collections[0]["id"] == test_collection_id + # Should find exactly one collection with the specified ID + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] - # Test 2: CQL2-text format with LIKE operator for more advanced filtering - # Use a filter that will match the test collection ID we created - filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'" + assert ( + len(found_collections) == 1 + ), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)} for {endpoint['method']} {endpoint['path']}" + assert found_collections[0]["id"] == test_collection_id - resp = await app_client.get( - f"/collections?filter={filter_text}&filter-lang=cql2-text", - ) - assert resp.status_code == 200 - resp_json = resp.json() + # Test 2: CQL2-text format with LIKE operator + filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'" - # Should find the test collection we created - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("filter", filter_text), ("filter-lang", "cql2-text")], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("filter", filter_text), ("filter-lang", "cql2-text")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"filter": filter_text, "filter-lang": "cql2-text"}, + }, ] - assert ( - len(found_collections) >= 1 - ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter" + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Should find the test collection we created + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] + assert ( + len(found_collections) >= 1 + ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter for {endpoint['method']} {endpoint['path']}" @pytest.mark.asyncio async def test_collections_query_extension(app_client, txn_client, ctx): - """Verify GET /collections honors the query extension.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor the query extension.""" # Create multiple collections with different content base_collection = ctx.collection # Use unique prefixes to avoid conflicts between tests @@ -370,75 +453,100 @@ async def test_collections_query_extension(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Use the exact ID that was created + # Test 1: Query with equal operator sentinel_id = f"{test_prefix}-sentinel" - query = {"id": {"eq": sentinel_id}} - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - found_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("query", json.dumps(query))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("query", json.dumps(query))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"query": json.dumps(query)}, + }, ] - # Should only find the sentinel collection - assert len(found_collections) == 1 - assert found_collections[0]["id"] == f"{test_prefix}-sentinel" - - # Test query extension with equal operator on ID - query = {"id": {"eq": f"{test_prefix}-sentinel"}} + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() - # Filter collections to only include the ones we created for this test - found_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) - ] - found_ids = [c["id"] for c in found_collections] + # Filter collections to only include the ones we created for this test + found_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] - # Should find landsat and modis collections but not sentinel - assert len(found_collections) == 1 - assert f"{test_prefix}-sentinel" in found_ids - assert f"{test_prefix}-landsat" not in found_ids - assert f"{test_prefix}-modis" not in found_ids + # Should only find the sentinel collection + assert ( + len(found_collections) == 1 + ), f"Expected 1 collection for {endpoint['method']} {endpoint['path']}" + assert found_collections[0]["id"] == sentinel_id - # Test query extension with not-equal operator on ID + # Test 2: Query with not-equal operator query = {"id": {"neq": f"{test_prefix}-sentinel"}} - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - found_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("query", json.dumps(query))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("query", json.dumps(query))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"query": json.dumps(query)}, + }, ] - found_ids = [c["id"] for c in found_collections] - # Should find landsat and modis collections but not sentinel - assert len(found_collections) == 2 - assert f"{test_prefix}-sentinel" not in found_ids - assert f"{test_prefix}-landsat" in found_ids - assert f"{test_prefix}-modis" in found_ids + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + found_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + found_ids = [c["id"] for c in found_collections] + + # Should find landsat and modis collections but not sentinel + assert ( + len(found_collections) == 2 + ), f"Expected 2 collections for {endpoint['method']} {endpoint['path']}" + assert f"{test_prefix}-sentinel" not in found_ids + assert f"{test_prefix}-landsat" in found_ids + assert f"{test_prefix}-modis" in found_ids @pytest.mark.asyncio async def test_collections_datetime_filter(app_client, load_test_data, txn_client): - """Test filtering collections by datetime.""" + """Test filtering collections by datetime across all endpoints.""" # Create a test collection with a specific temporal extent base_collection = load_test_data("test_collection.json") @@ -450,66 +558,71 @@ async def test_collections_datetime_filter(app_client, load_test_data, txn_clien await create_collection(txn_client, base_collection) await refresh_indices(txn_client) - # Test 1: Datetime range that overlaps with collection's temporal extent - resp = await app_client.get( - "/collections?datetime=2020-06-01T00:00:00Z/2021-01-01T00:00:00Z" - ) - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 1 - ), f"Expected to find collection {test_collection_id} with overlapping datetime range" - - # Test 2: Datetime range that is completely before collection's temporal extent - resp = await app_client.get( - "/collections?datetime=2019-01-01T00:00:00Z/2019-12-31T23:59:59Z" - ) - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id + # Test scenarios with different datetime ranges + test_scenarios = [ + { + "name": "overlapping range", + "datetime": "2020-06-01T00:00:00Z/2021-01-01T00:00:00Z", + "expected_count": 1, + }, + { + "name": "before range", + "datetime": "2019-01-01T00:00:00Z/2019-12-31T23:59:59Z", + "expected_count": 0, + }, + { + "name": "after range", + "datetime": "2021-01-01T00:00:00Z/2021-12-31T23:59:59Z", + "expected_count": 0, + }, + { + "name": "single datetime within range", + "datetime": "2020-06-15T12:00:00Z", + "expected_count": 1, + }, + { + "name": "open-ended future range", + "datetime": "2020-06-01T00:00:00Z/..", + "expected_count": 1, + }, ] - assert ( - len(found_collections) == 0 - ), f"Expected not to find collection {test_collection_id} with non-overlapping datetime range" - # Test 3: Datetime range that is completely after collection's temporal extent - resp = await app_client.get( - "/collections?datetime=2021-01-01T00:00:00Z/2021-12-31T23:59:59Z" - ) - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 0 - ), f"Expected not to find collection {test_collection_id} with non-overlapping datetime range" + for scenario in test_scenarios: + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("datetime", scenario["datetime"])], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("datetime", scenario["datetime"])], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"datetime": scenario["datetime"]}, + }, + ] - # Test 4: Single datetime that falls within collection's temporal extent - resp = await app_client.get("/collections?datetime=2020-06-15T12:00:00Z") - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 1 - ), f"Expected to find collection {test_collection_id} with datetime point within range" + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Test 5: Open-ended range (from a specific date to the future) - resp = await app_client.get("/collections?datetime=2020-06-01T00:00:00Z/..") - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 1 - ), f"Expected to find collection {test_collection_id} with open-ended future range" + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']} with {scenario['name']}" + resp_json = resp.json() + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] + assert len(found_collections) == scenario["expected_count"], ( + f"Expected {scenario['expected_count']} collection(s) for {scenario['name']} " + f"on {endpoint['method']} {endpoint['path']}, found {len(found_collections)}" + ) # Test 6: Open-ended range (from the past to a date within the collection's range) # TODO: This test is currently skipped due to an unresolved issue with open-ended past range queries. @@ -528,7 +641,7 @@ async def test_collections_datetime_filter(app_client, load_test_data, txn_clien @pytest.mark.asyncio async def test_collections_number_matched_returned(app_client, txn_client, ctx): - """Verify GET /collections returns correct numberMatched and numberReturned values.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search return correct numberMatched and numberReturned values.""" # Create multiple collections with different ids base_collection = ctx.collection @@ -545,56 +658,91 @@ async def test_collections_number_matched_returned(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Test with limit=5 - resp = await app_client.get( - "/collections", - params=[("limit", "5")], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + # Test 1: With limit=5 + endpoints = [ + {"method": "GET", "path": "/collections", "params": [("limit", "5")]}, + {"method": "GET", "path": "/collections-search", "params": [("limit", "5")]}, + {"method": "POST", "path": "/collections-search", "body": {"limit": 5}}, ] - # Should return 5 collections - assert len(test_collections) == 5 + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Check that numberReturned matches the number of collections returned - assert resp_json["numberReturned"] == len(resp_json["collections"]) + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() - # Check that numberMatched is greater than or equal to numberReturned - # (since there might be other collections in the database) - assert resp_json["numberMatched"] >= resp_json["numberReturned"] + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Should return 5 collections + assert ( + len(test_collections) == 5 + ), f"Expected 5 test collections for {endpoint['method']} {endpoint['path']}" - # Check that numberMatched includes at least all our test collections - assert resp_json["numberMatched"] >= len(collection_ids) + # Check that numberReturned matches the number of collections returned + assert resp_json["numberReturned"] == len(resp_json["collections"]) - # Now test with a query that should match only some collections + # Check that numberMatched is greater than or equal to numberReturned + assert resp_json["numberMatched"] >= resp_json["numberReturned"] + + # Check that numberMatched includes at least all our test collections + assert resp_json["numberMatched"] >= len(collection_ids) + + # Test 2: With a query that should match only one collection query = {"id": {"eq": f"{test_prefix}-1"}} - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("query", json.dumps(query))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("query", json.dumps(query))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"query": json.dumps(query)}, + }, ] - # Should return only 1 collection - assert len(test_collections) == 1 - assert test_collections[0]["id"] == f"{test_prefix}-1" + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Check that numberReturned matches the number of collections returned - assert resp_json["numberReturned"] == len(resp_json["collections"]) + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Should return only 1 collection + assert ( + len(test_collections) == 1 + ), f"Expected 1 test collection for {endpoint['method']} {endpoint['path']}" + assert test_collections[0]["id"] == f"{test_prefix}-1" + + # Check that numberReturned matches the number of collections returned + assert resp_json["numberReturned"] == len(resp_json["collections"]) - # Check that numberMatched matches the number of collections that match the query - # (should be 1 in this case) - assert resp_json["numberMatched"] >= 1 + # Check that numberMatched matches the number of collections that match the query + assert resp_json["numberMatched"] >= 1 @pytest.mark.asyncio From 3f474f995d60ce6ee5f18089eb9318ccafd579b8 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 15:54:05 +0800 Subject: [PATCH 06/22] changelog fix --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b00f0acc..bb6738844 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed +[v6.5.1] - 2025-09-30 + +### Fixed + - Issue where token, query param was not being passed to POST collections search logic [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483) - Issue where datetime param was not being passed from POST collections search logic to Elasticsearch [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483) - Collections search tests to ensure both GET /collections and GET/POST /collections-search endpoints are tested [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483) From 7919b3d114cbd225a6601f2623f7c45051c3b906 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 10 Oct 2025 15:11:41 +0800 Subject: [PATCH 07/22] add pr numbers --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb6738844..761ad155f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added -- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries. -- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments. +- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) + +### Changed +- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) ### Changed From 849d1cba46cefa72d2239c539df98f03012ca29d Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 10 Oct 2025 15:33:53 +0800 Subject: [PATCH 08/22] remove debug statements --- stac_fastapi/core/stac_fastapi/core/serializers.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index a8c17d2b7..d6a3b64ff 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -155,9 +155,6 @@ def stac_to_db( else spatial_extent["bbox"] ) collection_id = collection.get("id", "unknown") - logger.debug( - f"Converting bbox to bbox_shape for collection '{collection_id}': bbox={bbox}" - ) if len(bbox) >= 4: # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) @@ -166,16 +163,10 @@ def stac_to_db( if len(bbox) == 4: # 2D bbox: [minx, miny, maxx, maxy] maxx, maxy = bbox[2], bbox[3] - logger.debug( - f"Collection '{collection_id}': Processing 2D bbox" - ) else: # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) maxx, maxy = bbox[3], bbox[4] - logger.debug( - f"Collection '{collection_id}': Processing 3D bbox, discarding altitude values at indices 2 and 5" - ) # Convert bbox to GeoJSON polygon bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) @@ -183,9 +174,6 @@ def stac_to_db( "type": "Polygon", "coordinates": bbox_polygon_coords, } - logger.info( - f"Collection '{collection_id}': Created bbox_shape from bbox [{minx}, {miny}, {maxx}, {maxy}]" - ) else: logger.warning( f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" From ad6f7f65dce0bd920224329462efa65a783cd125 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 10 Oct 2025 19:35:49 +0800 Subject: [PATCH 09/22] move shared bbox code to helpers --- .../elasticsearch/database_logic.py | 40 +--------- .../stac_fastapi/opensearch/database_logic.py | 40 +--------- .../sfeos_helpers/database/__init__.py | 2 + .../sfeos_helpers/database/query.py | 76 ++++++++++++++++++- 4 files changed, 85 insertions(+), 73 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 699e436a6..f27f15bf9 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -29,6 +29,7 @@ ) from stac_fastapi.sfeos_helpers import filter as filter_module from stac_fastapi.sfeos_helpers.database import ( + apply_collections_bbox_filter_shared, apply_free_text_filter_shared, apply_intersects_filter_shared, create_index_templates_shared, @@ -317,42 +318,9 @@ async def get_all_collections( raise # Apply bbox filter if provided - if bbox: - # Parse bbox if it's a string (from GET requests) - if isinstance(bbox, str): - try: - bbox = [float(x.strip()) for x in bbox.split(",")] - except (ValueError, AttributeError) as e: - logger.error(f"Invalid bbox format: {bbox}, error: {e}") - bbox = None - - if bbox and len(bbox) >= 4: - # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) - # For geospatial queries, we discard altitude (z) values - minx, miny = bbox[0], bbox[1] - if len(bbox) == 4: - # 2D bbox - maxx, maxy = bbox[2], bbox[3] - else: - # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) - maxx, maxy = bbox[3], bbox[4] - - # Convert bbox to a polygon for geo_shape query - bbox_polygon = { - "type": "Polygon", - "coordinates": bbox2polygon(minx, miny, maxx, maxy), - } - # Add geo_shape query to filter collections by bbox_shape field - query_parts.append( - { - "geo_shape": { - "bbox_shape": { - "shape": bbox_polygon, - "relation": "intersects", - } - } - } - ) + bbox_filter = apply_collections_bbox_filter_shared(bbox) + if bbox_filter: + query_parts.append(bbox_filter) # Combine all query parts with AND logic if there are multiple datetime_filter = None diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 682394141..7f9be7238 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -29,6 +29,7 @@ from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings from stac_fastapi.sfeos_helpers import filter as filter_module from stac_fastapi.sfeos_helpers.database import ( + apply_collections_bbox_filter_shared, apply_free_text_filter_shared, apply_intersects_filter_shared, create_index_templates_shared, @@ -301,42 +302,9 @@ async def get_all_collections( raise # Apply bbox filter if provided - if bbox: - # Parse bbox if it's a string (from GET requests) - if isinstance(bbox, str): - try: - bbox = [float(x.strip()) for x in bbox.split(",")] - except (ValueError, AttributeError) as e: - logger.error(f"Invalid bbox format: {bbox}, error: {e}") - bbox = None - - if bbox and len(bbox) >= 4: - # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) - # For geospatial queries, we discard altitude (z) values - minx, miny = bbox[0], bbox[1] - if len(bbox) == 4: - # 2D bbox - maxx, maxy = bbox[2], bbox[3] - else: - # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) - maxx, maxy = bbox[3], bbox[4] - - # Convert bbox to a polygon for geo_shape query - bbox_polygon = { - "type": "Polygon", - "coordinates": bbox2polygon(minx, miny, maxx, maxy), - } - # Add geo_shape query to filter collections by bbox_shape field - query_parts.append( - { - "geo_shape": { - "bbox_shape": { - "shape": bbox_polygon, - "relation": "intersects", - } - } - } - ) + bbox_filter = apply_collections_bbox_filter_shared(bbox) + if bbox_filter: + query_parts.append(bbox_filter) # Combine all query parts with AND logic if there are multiple datetime_filter = None diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py index bacf1ac31..b1b0326f7 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py @@ -42,6 +42,7 @@ ) from .mapping import get_queryables_mapping_shared from .query import ( + apply_collections_bbox_filter_shared, apply_free_text_filter_shared, apply_intersects_filter_shared, populate_sort_shared, @@ -59,6 +60,7 @@ # Query operations "apply_free_text_filter_shared", "apply_intersects_filter_shared", + "apply_collections_bbox_filter_shared", "populate_sort_shared", # Mapping operations "get_queryables_mapping_shared", diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index 80d071287..23727756c 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -3,8 +3,10 @@ This module provides functions for building and manipulating Elasticsearch/OpenSearch queries. """ -from typing import Any, Dict, List, Optional +import logging +from typing import Any, Dict, List, Optional, Union +from stac_fastapi.core.utilities import bbox2polygon from stac_fastapi.sfeos_helpers.mappings import Geometry ES_MAX_URL_LENGTH = 4096 @@ -66,6 +68,78 @@ def apply_intersects_filter_shared( } +def apply_collections_bbox_filter_shared( + bbox: Union[str, List[float], None] +) -> Optional[Dict[str, Dict]]: + """Create a geo_shape filter for collections bbox search. + + This function handles bbox parsing from both GET requests (string format) and POST requests + (list format), and constructs a geo_shape query for filtering collections by their bbox_shape field. + + Args: + bbox: The bounding box parameter. Can be: + - A string of comma-separated coordinates (from GET requests) + - A list of floats [minx, miny, maxx, maxy] for 2D bbox + - None if no bbox filter is provided + + Returns: + Optional[Dict[str, Dict]]: A dictionary containing the geo_shape filter configuration + that can be used with Elasticsearch/OpenSearch queries, or None if bbox is invalid. + Example return value: + { + "geo_shape": { + "bbox_shape": { + "shape": { + "type": "Polygon", + "coordinates": [[[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy], [minx, miny]]] + }, + "relation": "intersects" + } + } + } + + Notes: + - This function is specifically for filtering collections by their spatial extent + - It queries the bbox_shape field (not the geometry field used for items) + - The bbox is expected to be 2D (4 values) after any 3D to 2D conversion in the API layer + """ + logger = logging.getLogger(__name__) + + if not bbox: + return None + + # Parse bbox if it's a string (from GET requests) + if isinstance(bbox, str): + try: + bbox = [float(x.strip()) for x in bbox.split(",")] + except (ValueError, AttributeError) as e: + logger.error(f"Invalid bbox format: {bbox}, error: {e}") + return None + + if not bbox or len(bbox) != 4: + if bbox: + logger.warning( + f"bbox has incorrect number of coordinates (length={len(bbox)}), expected 4 (2D bbox)" + ) + return None + + # Convert bbox to a polygon for geo_shape query + bbox_polygon = { + "type": "Polygon", + "coordinates": bbox2polygon(bbox[0], bbox[1], bbox[2], bbox[3]), + } + + # Return geo_shape query for bbox_shape field + return { + "geo_shape": { + "bbox_shape": { + "shape": bbox_polygon, + "relation": "intersects", + } + } + } + + def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: """Create a sort configuration for Elasticsearch/OpenSearch queries. From c5a7efcf6fabf83797efadcbdd5f3dec36111c2e Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 11 Oct 2025 00:08:28 +0800 Subject: [PATCH 10/22] add sfeos-tools CLI tool --- CHANGELOG.md | 7 +- README.md | 73 +++++++- recreate_collections_index.py | 121 ------------ sfeos_tools/README.md | 113 ++++++++++++ sfeos_tools/setup.py | 55 ++++++ sfeos_tools/sfeos_tools/__init__.py | 3 + sfeos_tools/sfeos_tools/cli.py | 273 ++++++++++++++++++++++++++++ update_collections_mapping.py | 137 -------------- 8 files changed, 519 insertions(+), 263 deletions(-) delete mode 100644 recreate_collections_index.py create mode 100644 sfeos_tools/README.md create mode 100644 sfeos_tools/setup.py create mode 100644 sfeos_tools/sfeos_tools/__init__.py create mode 100644 sfeos_tools/sfeos_tools/cli.py delete mode 100644 update_collections_mapping.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 869d04c3d..6731372bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,15 +9,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added -- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) +- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries when created or updated. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) +- Introduced SFEOS Tools (`sfeos_tools/`) - An installable Click-based CLI package for managing SFEOS deployments. Initial command `add-bbox-shape` adds the `bbox_shape` field to existing collections for spatial search compatibility. Install with `pip install sfeos-tools[elasticsearch]` or `pip install sfeos-tools[opensearch]`. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) ### Changed -- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) + - CloudFerro logo to sponsors and supporters list [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) - Latest news section to README [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) -### Changed - ### Fixed [v6.5.1] - 2025-09-30 diff --git a/README.md b/README.md index b87bd21be..711f768bf 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI - [Interacting with the API](#interacting-with-the-api) - [Configure the API](#configure-the-api) - [Collection Pagination](#collection-pagination) + - [SFEOS Tools CLI](#sfeos-tools-cli) - [Ingesting Sample Data CLI Tool](#ingesting-sample-data-cli-tool) - [Elasticsearch Mappings](#elasticsearch-mappings) - [Managing Elasticsearch Indices](#managing-elasticsearch-indices) @@ -160,9 +161,21 @@ These endpoints support advanced collection discovery features including: - Collections are matched if their temporal extent overlaps with the provided datetime parameter - This allows for efficient discovery of collections based on time periods +- **Spatial Filtering**: Filter collections by their spatial extent using the `bbox` parameter + - Example: `/collections?bbox=-10,35,40,70` (finds collections whose spatial extent intersects with this bounding box) + - Example: `/collections?bbox=-180,-90,180,90` (finds all collections with global coverage) + - Supports both 2D bounding boxes `[minx, miny, maxx, maxy]` and 3D bounding boxes `[minx, miny, minz, maxx, maxy, maxz]` (altitude values are ignored for spatial queries) + - Collections are matched if their spatial extent (stored in the `extent.spatial.bbox` field) intersects with the provided bbox parameter + - **Implementation Note**: When collections are created or updated, a `bbox_shape` field is automatically generated from the collection's spatial extent and indexed as a GeoJSON polygon for efficient geospatial queries + - **Migrating Legacy Collections**: Collections created before this feature was added will not be discoverable via bbox search until they have the `bbox_shape` field added. You can either: + - Update each collection via the API (PUT `/collections/{collection_id}` with the existing collection data) + - Run the migration tool (see [SFEOS Tools CLI](#sfeos-tools-cli) for installation and connection options): + - `sfeos-tools add-bbox-shape --backend elasticsearch --no-ssl` + - `sfeos-tools add-bbox-shape --backend opensearch --host db.example.com --no-ssl` + These extensions make it easier to build user interfaces that display and navigate through collections efficiently. -> **Configuration**: Collection search extensions (sorting, field selection, free text search, structured filtering, and datetime filtering) for the `/collections` endpoint can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled. +> **Configuration**: Collection search extensions (sorting, field selection, free text search, structured filtering, datetime filtering, and spatial filtering) for the `/collections` endpoint can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled. > > **Configuration**: The custom `/collections-search` endpoint can be enabled by setting the `ENABLE_COLLECTIONS_SEARCH_ROUTE` environment variable to `true`. By default, this endpoint is **disabled**. @@ -470,6 +483,64 @@ The system uses a precise naming convention: curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token" ``` +## SFEOS Tools CLI + +- **Overview**: SFEOS Tools is an installable CLI package for managing and maintaining SFEOS deployments. + +- **Installation**: + ```shell + # For Elasticsearch (from PyPI) + pip install sfeos-tools[elasticsearch] + + # For OpenSearch (from PyPI) + pip install sfeos-tools[opensearch] + + # For local development + pip install -e sfeos_tools[elasticsearch] + # or + pip install -e sfeos_tools[opensearch] + ``` + +- **Available Commands**: + - `add-bbox-shape`: Add bbox_shape field to existing collections for spatial search support + +- **Basic Usage**: + ```shell + sfeos-tools add-bbox-shape --backend elasticsearch + sfeos-tools add-bbox-shape --backend opensearch + ``` + +- **Connection Options**: Configure database connection via CLI flags or environment variables: + - `--host`: Database host (default: `localhost` or `ES_HOST` env var) + - `--port`: Database port (default: `9200` or `ES_PORT` env var) + - `--use-ssl` / `--no-ssl`: Use SSL connection (default: `true` or `ES_USE_SSL` env var) + - `--user`: Database username (default: `ES_USER` env var) + - `--password`: Database password (default: `ES_PASS` env var) + +- **Examples**: + ```shell + # Local Docker Compose (no SSL) + sfeos-tools add-bbox-shape --backend elasticsearch --no-ssl + + # Remote server with SSL + sfeos-tools add-bbox-shape \ + --backend elasticsearch \ + --host db.example.com \ + --port 9200 \ + --user admin \ + --password secret + + # Cloud deployment with environment variables + ES_HOST=my-es-cluster.cloud.com ES_PORT=9243 ES_USER=elastic ES_PASS=changeme \ + sfeos-tools add-bbox-shape --backend elasticsearch + + # Using --help for more information + sfeos-tools --help + sfeos-tools add-bbox-shape --help + ``` + +For more details, see the [SFEOS Tools README](./sfeos_tools/README.md). + ## Ingesting Sample Data CLI Tool - **Overview**: The `data_loader.py` script provides a convenient way to load STAC items into the database. diff --git a/recreate_collections_index.py b/recreate_collections_index.py deleted file mode 100644 index 047a14675..000000000 --- a/recreate_collections_index.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to delete and recreate the collections index. - -WARNING: This will DELETE all existing collections! -Only use this in development environments. - -Usage: - python recreate_collections_index.py -""" - -import asyncio -import os -import sys - -from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX - -# Determine which backend to use -BACKEND = os.getenv("BACKEND", "elasticsearch").lower() - -if BACKEND == "opensearch": - from stac_fastapi.opensearch.config import ( - AsyncOpensearchSettings as AsyncSearchSettings, - ) - from stac_fastapi.opensearch.database_logic import ( - create_collection_index, - create_index_templates, - ) -else: - from stac_fastapi.elasticsearch.config import ( - AsyncElasticsearchSettings as AsyncSearchSettings, - ) - from stac_fastapi.elasticsearch.database_logic import ( - create_collection_index, - create_index_templates, - ) - - -async def recreate_index(): - """Delete and recreate the collections index.""" - settings = AsyncSearchSettings() - client = settings.create_client - - print(f"Using backend: {BACKEND}") - print(f"\n{'=' * 60}") - print("WARNING: This will DELETE all existing collections!") - print(f"{'=' * 60}\n") - - # Check if running in production - env = os.getenv("ENVIRONMENT", "development").lower() - if env == "production": - print("ERROR: This script should not be run in production!") - print("Use update_collections_mapping.py instead.") - sys.exit(1) - - response = input("Are you sure you want to continue? (yes/no): ") - if response.lower() != "yes": - print("Aborted.") - sys.exit(0) - - try: - # Delete the collections index - index_name = f"{COLLECTIONS_INDEX}-000001" - alias_name = COLLECTIONS_INDEX - - print(f"\nDeleting index {index_name}...") - exists = await client.indices.exists(index=index_name) - if exists: - await client.indices.delete(index=index_name) - print(f"✓ Deleted index {index_name}") - else: - print(f"⊘ Index {index_name} does not exist") - - # Check if alias exists and delete it - alias_exists = await client.indices.exists_alias(name=alias_name) - if alias_exists: - print(f"Deleting alias {alias_name}...") - await client.indices.delete_alias( - index="_all", name=alias_name, ignore=[404] - ) - print(f"✓ Deleted alias {alias_name}") - - # Recreate index templates - print("\nRecreating index templates...") - await create_index_templates() - print("✓ Index templates created") - - # Recreate the collections index - print("\nRecreating collections index...") - await create_collection_index() - print("✓ Collections index created") - - # Verify the mapping includes bbox_shape - print("\nVerifying mapping...") - mapping = await client.indices.get_mapping(index=index_name) - properties = mapping[index_name]["mappings"]["properties"] - - if "bbox_shape" in properties: - print( - f"✓ bbox_shape field is present in mapping: {properties['bbox_shape']}" - ) - else: - print("✗ WARNING: bbox_shape field is NOT in the mapping!") - - print("\n" + "=" * 60) - print("Collections index successfully recreated!") - print("You can now create collections with bbox_shape support.") - print("=" * 60) - - except Exception as e: - print(f"\n✗ Error: {e}") - import traceback - - traceback.print_exc() - sys.exit(1) - finally: - await client.close() - - -if __name__ == "__main__": - asyncio.run(recreate_index()) diff --git a/sfeos_tools/README.md b/sfeos_tools/README.md new file mode 100644 index 000000000..5347c28b3 --- /dev/null +++ b/sfeos_tools/README.md @@ -0,0 +1,113 @@ +# SFEOS Tools + +CLI tools for managing [stac-fastapi-elasticsearch-opensearch](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) deployments. + +## Installation + +### For Elasticsearch + +```bash +pip install sfeos-tools[elasticsearch] +``` + +Or for local development: +```bash +pip install -e sfeos_tools[elasticsearch] +``` + +### For OpenSearch + +```bash +pip install sfeos-tools[opensearch] +``` + +Or for local development: +```bash +pip install -e sfeos_tools[opensearch] +``` + +### For Development (both backends) + +```bash +pip install sfeos-tools[dev] +``` + +Or for local development: +```bash +pip install -e sfeos_tools[dev] +``` + +## Usage + +After installation, the `sfeos-tools` command will be available: + +```bash +# View available commands +sfeos-tools --help + +# View version +sfeos-tools --version + +# Get help for a specific command +sfeos-tools add-bbox-shape --help +``` + +## Commands + +### add-bbox-shape + +Add `bbox_shape` field to existing collections for spatial search support. + +**Basic usage:** + +```bash +# Elasticsearch +sfeos-tools add-bbox-shape --backend elasticsearch + +# OpenSearch +sfeos-tools add-bbox-shape --backend opensearch +``` + +**Connection options:** + +```bash +# Local Docker Compose (no SSL) +sfeos-tools add-bbox-shape --backend elasticsearch --no-ssl + +# Remote server with SSL +sfeos-tools add-bbox-shape \ + --backend elasticsearch \ + --host db.example.com \ + --port 9200 \ + --user admin \ + --password secret + +# Using environment variables +ES_HOST=my-cluster.cloud.com ES_PORT=9243 ES_USER=elastic ES_PASS=changeme \ + sfeos-tools add-bbox-shape --backend elasticsearch +``` + +**Available options:** + +- `--backend`: Database backend (elasticsearch or opensearch) - **required** +- `--host`: Database host (default: localhost or ES_HOST env var) +- `--port`: Database port (default: 9200 or ES_PORT env var) +- `--use-ssl / --no-ssl`: Use SSL connection (default: true or ES_USE_SSL env var) +- `--user`: Database username (default: ES_USER env var) +- `--password`: Database password (default: ES_PASS env var) + +## Development + +To develop sfeos-tools locally: + +```bash +# Install in editable mode with dev dependencies +pip install -e ./sfeos_tools[dev] + +# Run the CLI +sfeos-tools --help +``` + +## License + +MIT License - see the main repository for details. diff --git a/sfeos_tools/setup.py b/sfeos_tools/setup.py new file mode 100644 index 000000000..e781cb66c --- /dev/null +++ b/sfeos_tools/setup.py @@ -0,0 +1,55 @@ +"""Setup for SFEOS Tools.""" + +from setuptools import find_packages, setup + +with open("README.md", "r", encoding="utf-8") as f: + long_description = f.read() + +setup( + name="sfeos-tools", + version="0.1.0", + description="CLI tools for managing stac-fastapi-elasticsearch-opensearch deployments", + long_description=long_description, + long_description_content_type="text/markdown", + author="SFEOS Contributors", + url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", + packages=find_packages(), + python_requires=">=3.8", + install_requires=[ + "click>=8.0.0", + ], + extras_require={ + "elasticsearch": [ + "stac_fastapi_core", + "sfeos_helpers", + "stac_fastapi_elasticsearch", + ], + "opensearch": [ + "stac_fastapi_core", + "sfeos_helpers", + "stac_fastapi_opensearch", + ], + "dev": [ + "stac_fastapi_core", + "sfeos_helpers", + "stac_fastapi_elasticsearch", + "stac_fastapi_opensearch", + ], + }, + entry_points={ + "console_scripts": [ + "sfeos-tools=sfeos_tools.cli:cli", + ], + }, + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + ], +) diff --git a/sfeos_tools/sfeos_tools/__init__.py b/sfeos_tools/sfeos_tools/__init__.py new file mode 100644 index 000000000..9106f13e8 --- /dev/null +++ b/sfeos_tools/sfeos_tools/__init__.py @@ -0,0 +1,3 @@ +"""SFEOS Tools - Utilities for managing stac-fastapi-elasticsearch-opensearch deployments.""" + +__version__ = "0.1.0" diff --git a/sfeos_tools/sfeos_tools/cli.py b/sfeos_tools/sfeos_tools/cli.py new file mode 100644 index 000000000..0da4f7140 --- /dev/null +++ b/sfeos_tools/sfeos_tools/cli.py @@ -0,0 +1,273 @@ +"""SFEOS CLI Tools - Utilities for managing stac-fastapi-elasticsearch-opensearch deployments. + +This tool provides various utilities for managing and maintaining SFEOS deployments, +including database migrations, maintenance tasks, and more. + +Usage: + sfeos-tools add-bbox-shape --backend elasticsearch + sfeos-tools add-bbox-shape --backend opensearch +""" + +import asyncio +import logging +import sys + +import click + +from stac_fastapi.core.utilities import bbox2polygon +from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +async def add_bbox_shape_to_collection(client, collection_doc, backend): + """Add bbox_shape field to a single collection document. + + Args: + client: Elasticsearch/OpenSearch client + collection_doc: Collection document from database + backend: Backend type ('elasticsearch' or 'opensearch') + + Returns: + bool: True if collection was updated, False if no update was needed + """ + collection = collection_doc["_source"] + collection_id = collection.get("id", collection_doc["_id"]) + + # Check if bbox_shape already exists + if "bbox_shape" in collection: + logger.info( + f"Collection '{collection_id}' already has bbox_shape field, skipping" + ) + return False + + # Check if collection has spatial extent + if "extent" not in collection or "spatial" not in collection["extent"]: + logger.warning(f"Collection '{collection_id}' has no spatial extent, skipping") + return False + + spatial_extent = collection["extent"]["spatial"] + if "bbox" not in spatial_extent or not spatial_extent["bbox"]: + logger.warning( + f"Collection '{collection_id}' has no bbox in spatial extent, skipping" + ) + return False + + # Get the first bbox (collections can have multiple bboxes, but we use the first one) + bbox = ( + spatial_extent["bbox"][0] + if isinstance(spatial_extent["bbox"][0], list) + else spatial_extent["bbox"] + ) + + if len(bbox) < 4: + logger.warning( + f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" + ) + return False + + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For 2D polygon, we only need the x,y coordinates and discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox: [minx, miny, maxx, maxy] + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] + # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to GeoJSON polygon + bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) + collection["bbox_shape"] = { + "type": "Polygon", + "coordinates": bbox_polygon_coords, + } + + # Update the collection in the database + if backend == "elasticsearch": + await client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + document=collection, + refresh=True, + ) + else: # opensearch + await client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + body=collection, + refresh=True, + ) + + logger.info(f"Collection '{collection_id}': Added bbox_shape field") + return True + + +async def run_add_bbox_shape(backend): + """Add bbox_shape field to all existing collections. + + Args: + backend: Backend type ('elasticsearch' or 'opensearch') + """ + import os + + logger.info( + f"Starting migration: Adding bbox_shape to existing collections ({backend})" + ) + + # Log connection info (showing what will be used by the client) + es_host = os.getenv("ES_HOST", "localhost") + es_port = os.getenv( + "ES_PORT", "9200" + ) # Both backends default to 9200 in their config + es_use_ssl = os.getenv("ES_USE_SSL", "true") + logger.info(f"Connecting to {backend} at {es_host}:{es_port} (SSL: {es_use_ssl})") + + # Create client based on backend + if backend == "elasticsearch": + from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings + + settings = AsyncElasticsearchSettings() + else: # opensearch + from stac_fastapi.opensearch.config import AsyncOpensearchSettings + + settings = AsyncOpensearchSettings() + + client = settings.create_client + + try: + # Get all collections + response = await client.search( + index=COLLECTIONS_INDEX, + body={ + "query": {"match_all": {}}, + "size": 10000, + }, # Adjust size if you have more collections + ) + + total_collections = response["hits"]["total"]["value"] + logger.info(f"Found {total_collections} collections to process") + + updated_count = 0 + skipped_count = 0 + + for hit in response["hits"]["hits"]: + was_updated = await add_bbox_shape_to_collection(client, hit, backend) + if was_updated: + updated_count += 1 + else: + skipped_count += 1 + + logger.info( + f"Migration complete: {updated_count} collections updated, {skipped_count} skipped" + ) + + except Exception as e: + logger.error(f"Migration failed with error: {e}") + raise + finally: + await client.close() + + +@click.group() +@click.version_option(version="0.1.0", prog_name="sfeos-tools") +def cli(): + """SFEOS Tools - Utilities for managing stac-fastapi-elasticsearch-opensearch deployments.""" + pass + + +@cli.command("add-bbox-shape") +@click.option( + "--backend", + type=click.Choice(["elasticsearch", "opensearch"], case_sensitive=False), + required=True, + help="Database backend to use", +) +@click.option( + "--host", + type=str, + default=None, + help="Database host (default: localhost or ES_HOST env var)", +) +@click.option( + "--port", + type=int, + default=None, + help="Database port (default: 9200 for ES, 9202 for OS, or ES_PORT env var)", +) +@click.option( + "--use-ssl/--no-ssl", + default=None, + help="Use SSL connection (default: true or ES_USE_SSL env var)", +) +@click.option( + "--user", + type=str, + default=None, + help="Database username (default: ES_USER env var)", +) +@click.option( + "--password", + type=str, + default=None, + help="Database password (default: ES_PASS env var)", +) +def add_bbox_shape(backend, host, port, use_ssl, user, password): + """Add bbox_shape field to existing collections for spatial search support. + + This migration is required for collections created before spatial search + was added. Collections created or updated after this feature will + automatically have the bbox_shape field. + + Examples: + sfeos_tools.py add-bbox-shape --backend elasticsearch + sfeos_tools.py add-bbox-shape --backend opensearch --host db.example.com --port 9200 + sfeos_tools.py add-bbox-shape --backend elasticsearch --no-ssl --host localhost + """ + import os + + # Set environment variables from CLI options if provided + if host: + os.environ["ES_HOST"] = host + if port: + os.environ["ES_PORT"] = str(port) + if use_ssl is not None: + os.environ["ES_USE_SSL"] = "true" if use_ssl else "false" + if user: + os.environ["ES_USER"] = user + if password: + os.environ["ES_PASS"] = password + + try: + asyncio.run(run_add_bbox_shape(backend.lower())) + click.echo(click.style("✓ Migration completed successfully", fg="green")) + except KeyboardInterrupt: + click.echo(click.style("\n✗ Migration interrupted by user", fg="yellow")) + sys.exit(1) + except Exception as e: + error_msg = str(e) + click.echo(click.style(f"✗ Migration failed: {error_msg}", fg="red")) + + # Provide helpful hints for common errors + if "TLS" in error_msg or "SSL" in error_msg: + click.echo( + click.style( + "\n💡 Hint: If you're connecting to a local Docker Compose instance, " + "try adding --no-ssl flag", + fg="yellow", + ) + ) + elif "Connection refused" in error_msg: + click.echo( + click.style( + "\n💡 Hint: Make sure your database is running and accessible at the specified host:port", + fg="yellow", + ) + ) + sys.exit(1) + + +if __name__ == "__main__": + cli() diff --git a/update_collections_mapping.py b/update_collections_mapping.py deleted file mode 100644 index 331d5aa84..000000000 --- a/update_collections_mapping.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to update the collections index mapping to add the bbox_shape field. - -This script will: -1. Add the bbox_shape field to the existing collections index -2. Reindex all collections to populate the bbox_shape field - -Usage: - python update_collections_mapping.py -""" - -import asyncio -import os -from unittest.mock import Mock - -from stac_fastapi.core.serializers import CollectionSerializer -from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX - -# Determine which backend to use -BACKEND = os.getenv("BACKEND", "elasticsearch").lower() - -if BACKEND == "opensearch": - from stac_fastapi.opensearch.config import ( - AsyncOpensearchSettings as AsyncSearchSettings, - ) -else: - from stac_fastapi.elasticsearch.config import ( - AsyncElasticsearchSettings as AsyncSearchSettings, - ) - - -async def update_mapping(): - """Update the collections index mapping to add bbox_shape field.""" - settings = AsyncSearchSettings() - client = settings.create_client - - print(f"Connecting to {BACKEND}...") - - # Check if index exists - index_name = f"{COLLECTIONS_INDEX}-000001" - exists = await client.indices.exists(index=index_name) - - if not exists: - print(f"Index {index_name} does not exist. Creating it...") - from stac_fastapi.elasticsearch.database_logic import create_collection_index - - await create_collection_index() - print("Index created successfully!") - return - - print(f"Index {index_name} exists. Updating mapping...") - - # Add the bbox_shape field to the mapping - try: - await client.indices.put_mapping( - index=index_name, body={"properties": {"bbox_shape": {"type": "geo_shape"}}} - ) - print("✓ Mapping updated successfully!") - except Exception as e: - print(f"✗ Error updating mapping: {e}") - return - - # Now reindex all collections to populate bbox_shape - print("\nReindexing collections to populate bbox_shape field...") - - try: - # Get all collections - response = await client.search( - index=index_name, - body={ - "query": {"match_all": {}}, - "size": 1000, # Adjust if you have more collections - }, - ) - - collections = response["hits"]["hits"] - print(f"Found {len(collections)} collections to update") - - if len(collections) == 0: - print("No collections to update.") - return - - # Create a mock request for the serializer - mock_request = Mock() - mock_request.base_url = "http://localhost:8080/" - - updated_count = 0 - error_count = 0 - - for hit in collections: - collection = hit["_source"] - collection_id = collection.get("id", "unknown") - - try: - # Use the serializer to convert bbox to bbox_shape - updated_collection = CollectionSerializer.stac_to_db( - collection, mock_request - ) - - # Check if bbox_shape was created - if "bbox_shape" in updated_collection: - # Update the document - await client.update( - index=index_name, - id=hit["_id"], - body={"doc": {"bbox_shape": updated_collection["bbox_shape"]}}, - refresh=True, - ) - print(f" ✓ Updated collection '{collection_id}'") - updated_count += 1 - else: - print(f" ⊘ Collection '{collection_id}' has no bbox to convert") - except Exception as e: - print(f" ✗ Error updating collection '{collection_id}': {e}") - error_count += 1 - - print("\n" + "=" * 60) - print("Summary:") - print(f" Total collections: {len(collections)}") - print(f" Successfully updated: {updated_count}") - print(f" Errors: {error_count}") - print(f" Skipped (no bbox): {len(collections) - updated_count - error_count}") - print("=" * 60) - - except Exception as e: - print(f"✗ Error during reindexing: {e}") - import traceback - - traceback.print_exc() - finally: - await client.close() - - -if __name__ == "__main__": - print(f"Using backend: {BACKEND}") - asyncio.run(update_mapping()) From 017eec5441983f0715e1b7dddb60a83e8b4cb596 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 11 Oct 2025 00:18:18 +0800 Subject: [PATCH 11/22] update --- sfeos_tools/LICENSE | 21 +++++++++++++++++++++ sfeos_tools/setup.py | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 sfeos_tools/LICENSE diff --git a/sfeos_tools/LICENSE b/sfeos_tools/LICENSE new file mode 100644 index 000000000..b91f107d5 --- /dev/null +++ b/sfeos_tools/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Jonathan Healy + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/sfeos_tools/setup.py b/sfeos_tools/setup.py index e781cb66c..aff9a8fd9 100644 --- a/sfeos_tools/setup.py +++ b/sfeos_tools/setup.py @@ -11,7 +11,8 @@ description="CLI tools for managing stac-fastapi-elasticsearch-opensearch deployments", long_description=long_description, long_description_content_type="text/markdown", - author="SFEOS Contributors", + author="Jonathan Healy", + license="MIT", url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", packages=find_packages(), python_requires=">=3.8", From 97afd4c2c7b78e008db5222a7928d86499f16fcc Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 11 Oct 2025 00:31:46 +0800 Subject: [PATCH 12/22] clean up --- CHANGELOG.md | 5 ++--- .../sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6731372bb..0bd1bc063 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,12 +11,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries when created or updated. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) - Introduced SFEOS Tools (`sfeos_tools/`) - An installable Click-based CLI package for managing SFEOS deployments. Initial command `add-bbox-shape` adds the `bbox_shape` field to existing collections for spatial search compatibility. Install with `pip install sfeos-tools[elasticsearch]` or `pip install sfeos-tools[opensearch]`. [#481](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/481) - -### Changed - - CloudFerro logo to sponsors and supporters list [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) - Latest news section to README [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) +### Changed + ### Fixed [v6.5.1] - 2025-09-30 diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index 18f6b8165..cb0c8f2d5 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -160,8 +160,6 @@ class Geometry(Protocol): # noqa "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, "properties": { "id": {"type": "keyword"}, - "title": {"type": "text"}, - "description": {"type": "text"}, "bbox_shape": {"type": "geo_shape"}, "extent.temporal.interval": { "type": "date", From 6a95c8e11c2bcb39c02be3c59bb7ad316b9838fc Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 11 Oct 2025 14:58:55 +0800 Subject: [PATCH 13/22] remove unused code block --- .../stac_fastapi/elasticsearch/database_logic.py | 6 ------ .../opensearch/stac_fastapi/opensearch/database_logic.py | 6 ------ 2 files changed, 12 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index f27f15bf9..9dcb96815 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -337,12 +337,6 @@ async def get_all_collections( else {"bool": {"must": query_parts}} ) - # Create a copy of the body for count query (without pagination and sorting) - count_body = body.copy() - if "search_after" in count_body: - del count_body["search_after"] - count_body["size"] = 0 - # Create async tasks for both search and count search_task = asyncio.create_task( self.client.search( diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 7f9be7238..cdc08f125 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -321,12 +321,6 @@ async def get_all_collections( else {"bool": {"must": query_parts}} ) - # Create a copy of the body for count query (without pagination and sorting) - count_body = body.copy() - if "search_after" in count_body: - del count_body["search_after"] - count_body["size"] = 0 - # Create async tasks for both search and count search_task = asyncio.create_task( self.client.search( From ebbc7fe5cf6063b51e830c775a5126897b5fd1fa Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 11 Oct 2025 15:05:32 +0800 Subject: [PATCH 14/22] update docstrings --- stac_fastapi/core/stac_fastapi/core/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index e2bdf4952..cc175b6ce 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -256,14 +256,17 @@ async def all_collections( """Read all collections from the database. Args: - datetime (Optional[str]): Filter collections by datetime range. limit (Optional[int]): Maximum number of collections to return. + bbox (Optional[BBox]): Bounding box to filter collections by spatial extent. + datetime (Optional[str]): Filter collections by datetime range. fields (Optional[List[str]]): Fields to include or exclude from the results. - sortby (Optional[str]): Sorting options for the results. + sortby (Optional[Union[str, List[str]]]): Sorting options for the results. filter_expr (Optional[str]): Structured filter expression in CQL2 JSON or CQL2-text format. - query (Optional[str]): Legacy query parameter (deprecated). filter_lang (Optional[str]): Must be 'cql2-json' or 'cql2-text' if specified, other values will result in an error. q (Optional[Union[str, List[str]]]): Free text search terms. + query (Optional[str]): Legacy query parameter (deprecated). + request (Request): FastAPI Request object. + token (Optional[str]): Pagination token for retrieving the next page of results. **kwargs: Keyword arguments from the request. Returns: From 1924950e3f62df5099451e142e80b71fe74733ca Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 11 Oct 2025 15:18:52 +0800 Subject: [PATCH 15/22] update base database logic --- .../core/stac_fastapi/core/base_database_logic.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index c0a85498f..105fdf925 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -22,7 +22,11 @@ async def get_all_collections( request: Any = None, sort: Optional[List[Dict[str, Any]]] = None, bbox: Optional[BBox] = None, - ) -> Tuple[List[Dict[str, Any]], Optional[str]]: + q: Optional[List[str]] = None, + filter: Optional[Dict[str, Any]] = None, + query: Optional[Dict[str, Dict[str, Any]]] = None, + datetime: Optional[str] = None, + ) -> Tuple[List[Dict[str, Any]], Optional[str], Optional[int]]: """Retrieve a list of collections from the database, supporting pagination. Args: @@ -31,9 +35,13 @@ async def get_all_collections( request (Any, optional): The FastAPI request object. Defaults to None. sort (Optional[List[Dict[str, Any]]], optional): Optional sort parameter. Defaults to None. bbox (Optional[BBox], optional): Bounding box to filter collections by spatial extent. Defaults to None. + q (Optional[List[str]], optional): Free text search terms. Defaults to None. + filter (Optional[Dict[str, Any]], optional): Structured query in CQL2 format. Defaults to None. + query (Optional[Dict[str, Dict[str, Any]]], optional): Query extension parameters. Defaults to None. + datetime (Optional[str], optional): Temporal filter. Defaults to None. Returns: - A tuple of (collections, next pagination token if any). + A tuple of (collections, next pagination token if any, optional count). """ pass From 783e395fe006747dd96ccfac8d244699c527812f Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 11 Oct 2025 20:10:43 +0800 Subject: [PATCH 16/22] created shared datetime fiter fn --- .../elasticsearch/database_logic.py | 69 ++----------------- .../stac_fastapi/opensearch/database_logic.py | 48 ++----------- .../sfeos_helpers/database/__init__.py | 2 + .../sfeos_helpers/database/query.py | 61 ++++++++++++++++ 4 files changed, 75 insertions(+), 105 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 9dcb96815..c49c246fc 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -30,13 +30,13 @@ from stac_fastapi.sfeos_helpers import filter as filter_module from stac_fastapi.sfeos_helpers.database import ( apply_collections_bbox_filter_shared, + apply_collections_datetime_filter_shared, apply_free_text_filter_shared, apply_intersects_filter_shared, create_index_templates_shared, delete_item_index_shared, get_queryables_mapping_shared, index_alias_by_collection_id, - index_by_collection_id, mk_actions, mk_item_id, populate_sort_shared, @@ -100,26 +100,6 @@ async def create_collection_index() -> None: await client.close() -async def create_item_index(collection_id: str): - """ - Create the index for Items. The settings of the index template will be used implicitly. - - Args: - collection_id (str): Collection identifier. - - Returns: - None - - """ - client = AsyncElasticsearchSettings().create_client - - await client.options(ignore_status=400).indices.create( - index=f"{index_by_collection_id(collection_id)}-000001", - body={"aliases": {index_alias_by_collection_id(collection_id): {}}}, - ) - await client.close() - - async def delete_item_index(collection_id: str): """Delete the index for items in a collection. @@ -322,12 +302,10 @@ async def get_all_collections( if bbox_filter: query_parts.append(bbox_filter) - # Combine all query parts with AND logic if there are multiple - datetime_filter = None - if datetime: - datetime_filter = self._apply_collection_datetime_filter(datetime) - if datetime_filter: - query_parts.append(datetime_filter) + # Apply datetime filter if provided + datetime_filter = apply_collections_datetime_filter_shared(datetime) + if datetime_filter: + query_parts.append(datetime_filter) # Combine all query parts with AND logic if query_parts: @@ -386,41 +364,6 @@ async def get_all_collections( return collections, next_token, matched - @staticmethod - def _apply_collection_datetime_filter( - datetime_str: Optional[str], - ) -> Optional[Dict[str, Any]]: - """Create a temporal filter for collections based on their extent.""" - if not datetime_str: - return None - - # Parse the datetime string into start and end - if "/" in datetime_str: - start, end = datetime_str.split("/") - # Replace open-ended ranges with concrete dates - if start == "..": - # For open-ended start, use a very early date - start = "1800-01-01T00:00:00Z" - if end == "..": - # For open-ended end, use a far future date - end = "2999-12-31T23:59:59Z" - else: - # If it's just a single date, use it for both start and end - start = end = datetime_str - - return { - "bool": { - "must": [ - # Check if any date in the array is less than or equal to the query end date - # This will match if the collection's start date is before or equal to the query end date - {"range": {"extent.temporal.interval": {"lte": end}}}, - # Check if any date in the array is greater than or equal to the query start date - # This will match if the collection's end date is after or equal to the query start date - {"range": {"extent.temporal.interval": {"gte": start}}}, - ] - } - } - async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """Retrieve a single item from the database. @@ -1388,7 +1331,7 @@ async def create_collection(self, collection: Collection, **kwargs: Any): None Notes: - A new index is created for the items in the Collection using the `create_item_index` function. + A new index is created for the items in the Collection if the index insertion strategy requires it. """ collection_id = collection["id"] diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index cdc08f125..c36e077a5 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -30,6 +30,7 @@ from stac_fastapi.sfeos_helpers import filter as filter_module from stac_fastapi.sfeos_helpers.database import ( apply_collections_bbox_filter_shared, + apply_collections_datetime_filter_shared, apply_free_text_filter_shared, apply_intersects_filter_shared, create_index_templates_shared, @@ -306,12 +307,10 @@ async def get_all_collections( if bbox_filter: query_parts.append(bbox_filter) - # Combine all query parts with AND logic if there are multiple - datetime_filter = None - if datetime: - datetime_filter = self._apply_collection_datetime_filter(datetime) - if datetime_filter: - query_parts.append(datetime_filter) + # Apply datetime filter if provided + datetime_filter = apply_collections_datetime_filter_shared(datetime) + if datetime_filter: + query_parts.append(datetime_filter) # Combine all query parts with AND logic if query_parts: @@ -456,41 +455,6 @@ def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str] search=search, free_text_queries=free_text_queries ) - @staticmethod - def _apply_collection_datetime_filter( - datetime_str: Optional[str], - ) -> Optional[Dict[str, Any]]: - """Create a temporal filter for collections based on their extent.""" - if not datetime_str: - return None - - # Parse the datetime string into start and end - if "/" in datetime_str: - start, end = datetime_str.split("/") - # Replace open-ended ranges with concrete dates - if start == "..": - # For open-ended start, use a very early date - start = "1800-01-01T00:00:00Z" - if end == "..": - # For open-ended end, use a far future date - end = "2999-12-31T23:59:59Z" - else: - # If it's just a single date, use it for both start and end - start = end = datetime_str - - return { - "bool": { - "must": [ - # Check if any date in the array is less than or equal to the query end date - # This will match if the collection's start date is before or equal to the query end date - {"range": {"extent.temporal.interval": {"lte": end}}}, - # Check if any date in the array is greater than or equal to the query start date - # This will match if the collection's end date is after or equal to the query start date - {"range": {"extent.temporal.interval": {"gte": start}}}, - ] - } - } - @staticmethod def apply_datetime_filter( search: Search, datetime: Optional[str] @@ -1358,7 +1322,7 @@ async def create_collection(self, collection: Collection, **kwargs: Any): ConflictError: If a Collection with the same id already exists in the database. Notes: - A new index is created for the items in the Collection using the `create_item_index` function. + A new index is created for the items in the Collection if the index insertion strategy requires it. """ collection_id = collection["id"] diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py index b1b0326f7..8dae3362e 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py @@ -43,6 +43,7 @@ from .mapping import get_queryables_mapping_shared from .query import ( apply_collections_bbox_filter_shared, + apply_collections_datetime_filter_shared, apply_free_text_filter_shared, apply_intersects_filter_shared, populate_sort_shared, @@ -61,6 +62,7 @@ "apply_free_text_filter_shared", "apply_intersects_filter_shared", "apply_collections_bbox_filter_shared", + "apply_collections_datetime_filter_shared", "populate_sort_shared", # Mapping operations "get_queryables_mapping_shared", diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index 23727756c..72285a56f 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -68,6 +68,67 @@ def apply_intersects_filter_shared( } +def apply_collections_datetime_filter_shared( + datetime_str: Optional[str], +) -> Optional[Dict[str, Any]]: + """Create a temporal filter for collections based on their extent. + + Args: + datetime_str: The datetime parameter. Can be: + - A single datetime string (e.g., "2020-01-01T00:00:00Z") + - A datetime range with "/" separator (e.g., "2020-01-01T00:00:00Z/2021-01-01T00:00:00Z") + - Open-ended ranges using ".." (e.g., "../2021-01-01T00:00:00Z" or "2020-01-01T00:00:00Z/..") + - None if no datetime filter is provided + + Returns: + Optional[Dict[str, Any]]: A dictionary containing the temporal filter configuration + that can be used with Elasticsearch/OpenSearch queries, or None if datetime_str is None. + Example return value: + { + "bool": { + "must": [ + {"range": {"extent.temporal.interval": {"lte": "2021-01-01T00:00:00Z"}}}, + {"range": {"extent.temporal.interval": {"gte": "2020-01-01T00:00:00Z"}}} + ] + } + } + + Notes: + - This function is specifically for filtering collections by their temporal extent + - It queries the extent.temporal.interval field + - Open-ended ranges (..) are replaced with concrete dates (1800-01-01 for start, 2999-12-31 for end) + """ + if not datetime_str: + return None + + # Parse the datetime string into start and end + if "/" in datetime_str: + start, end = datetime_str.split("/") + # Replace open-ended ranges with concrete dates + if start == "..": + # For open-ended start, use a very early date + start = "1800-01-01T00:00:00Z" + if end == "..": + # For open-ended end, use a far future date + end = "2999-12-31T23:59:59Z" + else: + # If it's just a single date, use it for both start and end + start = end = datetime_str + + return { + "bool": { + "must": [ + # Check if any date in the array is less than or equal to the query end date + # This will match if the collection's start date is before or equal to the query end date + {"range": {"extent.temporal.interval": {"lte": end}}}, + # Check if any date in the array is greater than or equal to the query start date + # This will match if the collection's end date is after or equal to the query start date + {"range": {"extent.temporal.interval": {"gte": start}}}, + ] + } + } + + def apply_collections_bbox_filter_shared( bbox: Union[str, List[float], None] ) -> Optional[Dict[str, Dict]]: From 5935463dd56941341446f3e031c841c143e1c9a8 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 12 Oct 2025 00:53:01 +0800 Subject: [PATCH 17/22] move add bbox shape to fn --- sfeos_tools/sfeos_tools/cli.py | 58 ++----------- .../core/stac_fastapi/core/serializers.py | 40 ++------- .../sfeos_helpers/database/__init__.py | 3 +- .../sfeos_helpers/database/utils.py | 82 ++++++++++++++++++- 4 files changed, 94 insertions(+), 89 deletions(-) diff --git a/sfeos_tools/sfeos_tools/cli.py b/sfeos_tools/sfeos_tools/cli.py index 0da4f7140..827e81169 100644 --- a/sfeos_tools/sfeos_tools/cli.py +++ b/sfeos_tools/sfeos_tools/cli.py @@ -14,15 +14,15 @@ import click -from stac_fastapi.core.utilities import bbox2polygon +from stac_fastapi.sfeos_helpers.database import add_bbox_shape_to_collection from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -async def add_bbox_shape_to_collection(client, collection_doc, backend): - """Add bbox_shape field to a single collection document. +async def process_collection_bbox_shape(client, collection_doc, backend): + """Process a single collection document to add bbox_shape field. Args: client: Elasticsearch/OpenSearch client @@ -35,56 +35,12 @@ async def add_bbox_shape_to_collection(client, collection_doc, backend): collection = collection_doc["_source"] collection_id = collection.get("id", collection_doc["_id"]) - # Check if bbox_shape already exists - if "bbox_shape" in collection: - logger.info( - f"Collection '{collection_id}' already has bbox_shape field, skipping" - ) - return False - - # Check if collection has spatial extent - if "extent" not in collection or "spatial" not in collection["extent"]: - logger.warning(f"Collection '{collection_id}' has no spatial extent, skipping") - return False - - spatial_extent = collection["extent"]["spatial"] - if "bbox" not in spatial_extent or not spatial_extent["bbox"]: - logger.warning( - f"Collection '{collection_id}' has no bbox in spatial extent, skipping" - ) - return False - - # Get the first bbox (collections can have multiple bboxes, but we use the first one) - bbox = ( - spatial_extent["bbox"][0] - if isinstance(spatial_extent["bbox"][0], list) - else spatial_extent["bbox"] - ) + # Use the shared function to add bbox_shape + was_added = add_bbox_shape_to_collection(collection) - if len(bbox) < 4: - logger.warning( - f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" - ) + if not was_added: return False - # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) - # For 2D polygon, we only need the x,y coordinates and discard altitude (z) values - minx, miny = bbox[0], bbox[1] - if len(bbox) == 4: - # 2D bbox: [minx, miny, maxx, maxy] - maxx, maxy = bbox[2], bbox[3] - else: - # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] - # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) - maxx, maxy = bbox[3], bbox[4] - - # Convert bbox to GeoJSON polygon - bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) - collection["bbox_shape"] = { - "type": "Polygon", - "coordinates": bbox_polygon_coords, - } - # Update the collection in the database if backend == "elasticsearch": await client.index( @@ -154,7 +110,7 @@ async def run_add_bbox_shape(backend): skipped_count = 0 for hit in response["hits"]["hits"]: - was_updated = await add_bbox_shape_to_collection(client, hit, backend) + was_updated = await process_collection_bbox_shape(client, hit, backend) if was_updated: updated_count += 1 else: diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index d6a3b64ff..12639a5b6 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -10,7 +10,7 @@ from stac_fastapi.core.datetime_utils import now_to_rfc3339_str from stac_fastapi.core.models.links import CollectionLinks -from stac_fastapi.core.utilities import bbox2polygon, get_bool_env +from stac_fastapi.core.utilities import get_bool_env from stac_fastapi.types import stac as stac_types from stac_fastapi.types.links import ItemLinks, resolve_links @@ -139,45 +139,15 @@ def stac_to_db( Returns: stac_types.Collection: The database-ready STAC Collection object. """ + from stac_fastapi.sfeos_helpers.database import add_bbox_shape_to_collection + collection = deepcopy(collection) collection["links"] = resolve_links( collection.get("links", []), str(request.base_url) ) - # Convert bbox to bbox_shape for geospatial queries - if "extent" in collection and "spatial" in collection["extent"]: - spatial_extent = collection["extent"]["spatial"] - if "bbox" in spatial_extent and spatial_extent["bbox"]: - # Get the first bbox (collections can have multiple bboxes, but we use the first one) - bbox = ( - spatial_extent["bbox"][0] - if isinstance(spatial_extent["bbox"][0], list) - else spatial_extent["bbox"] - ) - collection_id = collection.get("id", "unknown") - - if len(bbox) >= 4: - # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) - # For 2D polygon, we only need the x,y coordinates and discard altitude (z) values - minx, miny = bbox[0], bbox[1] - if len(bbox) == 4: - # 2D bbox: [minx, miny, maxx, maxy] - maxx, maxy = bbox[2], bbox[3] - else: - # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] - # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) - maxx, maxy = bbox[3], bbox[4] - - # Convert bbox to GeoJSON polygon - bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) - collection["bbox_shape"] = { - "type": "Polygon", - "coordinates": bbox_polygon_coords, - } - else: - logger.warning( - f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" - ) + # Convert bbox to bbox_shape for geospatial queries (ES/OS specific) + add_bbox_shape_to_collection(collection) if get_bool_env("STAC_INDEX_ASSETS"): collection["assets"] = [ diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py index 8dae3362e..01dae07b8 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py @@ -48,7 +48,7 @@ apply_intersects_filter_shared, populate_sort_shared, ) -from .utils import get_bool_env, validate_refresh +from .utils import add_bbox_shape_to_collection, get_bool_env, validate_refresh __all__ = [ # Index operations @@ -72,6 +72,7 @@ # Utility functions "validate_refresh", "get_bool_env", + "add_bbox_shape_to_collection", # Datetime utilities "return_date", "extract_date", diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/utils.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/utils.py index 12085c378..eaa596fad 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/utils.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/utils.py @@ -5,9 +5,9 @@ """ import logging -from typing import Dict, List, Union +from typing import Any, Dict, List, Union -from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env from stac_fastapi.extensions.core.transaction.request import ( PatchAddReplaceTest, PatchOperation, @@ -15,6 +15,84 @@ ) from stac_fastapi.sfeos_helpers.models.patch import ElasticPath, ESCommandSet +logger = logging.getLogger(__name__) + + +def add_bbox_shape_to_collection(collection: Dict[str, Any]) -> bool: + """Add bbox_shape field to a collection document for spatial queries. + + This function extracts the bounding box from a collection's spatial extent + and converts it to a GeoJSON polygon shape that can be used for geospatial + queries in Elasticsearch/OpenSearch. + + Args: + collection: Collection document dictionary to modify in-place. + + Returns: + bool: True if bbox_shape was added, False if it was skipped (already exists, + no spatial extent, or invalid bbox). + + Notes: + - Modifies the collection dictionary in-place by adding a 'bbox_shape' field + - Handles both 2D [minx, miny, maxx, maxy] and 3D [minx, miny, minz, maxx, maxy, maxz] bboxes + - Uses the first bbox if multiple are present in the collection + - Logs warnings for collections with invalid or missing bbox data + """ + collection_id = collection.get("id", "unknown") + + # Check if bbox_shape already exists + if "bbox_shape" in collection: + logger.debug( + f"Collection '{collection_id}' already has bbox_shape field, skipping" + ) + return False + + # Check if collection has spatial extent + if "extent" not in collection or "spatial" not in collection["extent"]: + logger.warning(f"Collection '{collection_id}' has no spatial extent, skipping") + return False + + spatial_extent = collection["extent"]["spatial"] + if "bbox" not in spatial_extent or not spatial_extent["bbox"]: + logger.warning( + f"Collection '{collection_id}' has no bbox in spatial extent, skipping" + ) + return False + + # Get the first bbox (collections can have multiple bboxes, but we use the first one) + bbox = ( + spatial_extent["bbox"][0] + if isinstance(spatial_extent["bbox"][0], list) + else spatial_extent["bbox"] + ) + + if len(bbox) < 4: + logger.warning( + f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" + ) + return False + + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For 2D polygon, we only need the x,y coordinates and discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox: [minx, miny, maxx, maxy] + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] + # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to GeoJSON polygon + bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) + collection["bbox_shape"] = { + "type": "Polygon", + "coordinates": bbox_polygon_coords, + } + + logger.debug(f"Collection '{collection_id}': Added bbox_shape field") + return True + def validate_refresh(value: Union[str, bool]) -> str: """ From 724fd4221af8b563c829cfdecbf98e5dafb307f3 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 12 Oct 2025 01:07:45 +0800 Subject: [PATCH 18/22] update license --- sfeos_tools/LICENSE | 2 +- sfeos_tools/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sfeos_tools/LICENSE b/sfeos_tools/LICENSE index b91f107d5..1c6074b87 100644 --- a/sfeos_tools/LICENSE +++ b/sfeos_tools/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025 Jonathan Healy +Copyright (c) 2025 Jonathan Healy and CloudFerro S.A. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/sfeos_tools/setup.py b/sfeos_tools/setup.py index aff9a8fd9..d5cf44162 100644 --- a/sfeos_tools/setup.py +++ b/sfeos_tools/setup.py @@ -11,7 +11,7 @@ description="CLI tools for managing stac-fastapi-elasticsearch-opensearch deployments", long_description=long_description, long_description_content_type="text/markdown", - author="Jonathan Healy", + author="Jonathan Healy, CloudFerro S.A.", license="MIT", url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", packages=find_packages(), From 3aafba06e3739858f2148f457852e9500f65e9b7 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 12 Oct 2025 11:47:49 +0800 Subject: [PATCH 19/22] fix headings --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bd1bc063..6323533ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed -[v6.5.1] - 2025-09-30 +## [v6.5.1] - 2025-09-30 ### Fixed @@ -26,7 +26,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Issue where datetime param was not being passed from POST collections search logic to Elasticsearch [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483) - Collections search tests to ensure both GET /collections and GET/POST /collections-search endpoints are tested [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483) -[v6.5.0] - 2025-09-29 +## [v6.5.0] - 2025-09-29 ### Added From 0510fef99a844f3379054fc885151374202b3aaa Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 12 Oct 2025 12:25:28 +0800 Subject: [PATCH 20/22] move add bbox shape, check if extension enabled --- stac_fastapi/core/stac_fastapi/core/serializers.py | 5 ----- .../stac_fastapi/elasticsearch/database_logic.py | 13 +++++++++++++ .../stac_fastapi/opensearch/database_logic.py | 13 +++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index 12639a5b6..973de18d8 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -139,16 +139,11 @@ def stac_to_db( Returns: stac_types.Collection: The database-ready STAC Collection object. """ - from stac_fastapi.sfeos_helpers.database import add_bbox_shape_to_collection - collection = deepcopy(collection) collection["links"] = resolve_links( collection.get("links", []), str(request.base_url) ) - # Convert bbox to bbox_shape for geospatial queries (ES/OS specific) - add_bbox_shape_to_collection(collection) - if get_bool_env("STAC_INDEX_ASSETS"): collection["assets"] = [ {"es_key": k, **v} for k, v in collection.get("assets", {}).items() diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c49c246fc..c3f6f8530 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -29,6 +29,7 @@ ) from stac_fastapi.sfeos_helpers import filter as filter_module from stac_fastapi.sfeos_helpers.database import ( + add_bbox_shape_to_collection, apply_collections_bbox_filter_shared, apply_collections_datetime_filter_shared, apply_free_text_filter_shared, @@ -1349,6 +1350,12 @@ async def create_collection(self, collection: Collection, **kwargs: Any): if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): raise ConflictError(f"Collection {collection_id} already exists") + if get_bool_env("ENABLE_COLLECTIONS_SEARCH") or get_bool_env( + "ENABLE_COLLECTIONS_SEARCH_ROUTE" + ): + # Convert bbox to bbox_shape for geospatial queries (ES/OS specific) + add_bbox_shape_to_collection(collection) + # Index the collection in the database await self.client.index( index=COLLECTIONS_INDEX, @@ -1452,6 +1459,12 @@ async def update_collection( await self.delete_collection(collection_id) else: + if get_bool_env("ENABLE_COLLECTIONS_SEARCH") or get_bool_env( + "ENABLE_COLLECTIONS_SEARCH_ROUTE" + ): + # Convert bbox to bbox_shape for geospatial queries (ES/OS specific) + add_bbox_shape_to_collection(collection) + # Update the existing collection await self.client.index( index=COLLECTIONS_INDEX, diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index c36e077a5..9d814ba92 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -29,6 +29,7 @@ from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings from stac_fastapi.sfeos_helpers import filter as filter_module from stac_fastapi.sfeos_helpers.database import ( + add_bbox_shape_to_collection, apply_collections_bbox_filter_shared, apply_collections_datetime_filter_shared, apply_free_text_filter_shared, @@ -1339,6 +1340,12 @@ async def create_collection(self, collection: Collection, **kwargs: Any): if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): raise ConflictError(f"Collection {collection_id} already exists") + if get_bool_env("ENABLE_COLLECTIONS_SEARCH") or get_bool_env( + "ENABLE_COLLECTIONS_SEARCH_ROUTE" + ): + # Convert bbox to bbox_shape for geospatial queries (ES/OS specific) + add_bbox_shape_to_collection(collection) + await self.client.index( index=COLLECTIONS_INDEX, id=collection_id, @@ -1430,6 +1437,12 @@ async def update_collection( await self.delete_collection(collection_id=collection_id, **kwargs) else: + if get_bool_env("ENABLE_COLLECTIONS_SEARCH") or get_bool_env( + "ENABLE_COLLECTIONS_SEARCH_ROUTE" + ): + # Convert bbox to bbox_shape for geospatial queries (ES/OS specific) + add_bbox_shape_to_collection(collection) + await self.client.index( index=COLLECTIONS_INDEX, id=collection_id, From cb1f29abf3091d44ebaf91013007ac661d1eaddb Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 12 Oct 2025 13:46:23 +0800 Subject: [PATCH 21/22] update latest news --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 711f768bf..cb47f83f3 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,8 @@ The following organizations have contributed time and/or funding to support the
-- 10/04/2025: The [CloudFerro](https://cloudferro.com/) logo has been added to the sponsors and supporters list above. Their sponsorship of the ongoing collections search extension work has been invaluable. This is in addition to the many other important changes and updates their developers have added to the project. +- **10/12/2025:** Collections search bbox functionality added! The collections search extension now supports bbox queries. Collections will need to be updated via the API or with the new [`SFEOS-tools`](#sfeos-tools-cli) CLI package to support geospatial discoverability. Thanks again to CloudFerro for their sponsorship of this work! +- **10/04/2025:** The [CloudFerro](https://cloudferro.com/) logo has been added to the sponsors and supporters list above. Their sponsorship of the ongoing collections search extension work has been invaluable. This is in addition to the many other important changes and updates their developers have added to the project.
From 567e17eb785676e5f9acf94f7465b4aa7b3518f9 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 12 Oct 2025 13:50:57 +0800 Subject: [PATCH 22/22] improve readibility --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cb47f83f3..37dbdba7d 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ The following organizations have contributed time and/or funding to support the
-- **10/12/2025:** Collections search bbox functionality added! The collections search extension now supports bbox queries. Collections will need to be updated via the API or with the new [`SFEOS-tools`](#sfeos-tools-cli) CLI package to support geospatial discoverability. Thanks again to CloudFerro for their sponsorship of this work! -- **10/04/2025:** The [CloudFerro](https://cloudferro.com/) logo has been added to the sponsors and supporters list above. Their sponsorship of the ongoing collections search extension work has been invaluable. This is in addition to the many other important changes and updates their developers have added to the project. +- **10/12/2025:** Collections search **bbox** functionality added! The collections search extension now supports bbox queries. Collections will need to be updated via the API or with the new **[SFEOS-tools](#sfeos-tools-cli)** CLI package to support geospatial discoverability. Thanks again to **CloudFerro** for their sponsorship of this work! +- **10/04/2025:** The **[CloudFerro](https://cloudferro.com/)** logo has been added to the sponsors and supporters list above. Their sponsorship of the ongoing collections search extension work has been invaluable. This is in addition to the many other important changes and updates their developers have added to the project.