From b585819d264c2b9ea6fe861f7382e81a088a6a60 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 19 Sep 2025 14:57:41 +0800 Subject: [PATCH 1/8] filter scratch --- stac_fastapi/core/stac_fastapi/core/core.py | 24 ++++- .../elasticsearch/database_logic.py | 101 +++++++++++++++++- .../tests/api/test_api_search_collections.py | 67 +++++++++++- 3 files changed, 185 insertions(+), 7 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 9d01deaf..ca6db253 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -228,6 +228,7 @@ async def all_collections( self, fields: Optional[List[str]] = None, sortby: Optional[str] = None, + filter_expr: Optional[str] = None, q: Optional[Union[str, List[str]]] = None, **kwargs, ) -> stac_types.Collections: @@ -236,12 +237,14 @@ async def all_collections( Args: fields (Optional[List[str]]): Fields to include or exclude from the results. sortby (Optional[str]): Sorting options for the results. - q (Optional[List[str]]): Free text search terms. + filter_expr (Optional[str]): Structured filter in CQL2 format. + q (Optional[Union[str, List[str]]]): Free text search terms. **kwargs: Keyword arguments from the request. Returns: A Collections object containing all the collections in the database and links to various resources. """ + print("filter: ", filter_expr) request = kwargs["request"] base_url = str(request.base_url) limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) @@ -276,8 +279,25 @@ async def all_collections( if q is not None: q_list = [q] if isinstance(q, str) else q + # Parse the filter parameter if provided + parsed_filter = None + if filter_expr is not None: + try: + import orjson + + parsed_filter = orjson.loads(filter_expr) + except Exception as e: + raise HTTPException( + status_code=400, detail=f"Invalid filter parameter: {e}" + ) + collections, next_token = await self.database.get_all_collections( - token=token, limit=limit, request=request, sort=sort, q=q_list + token=token, + limit=limit, + request=request, + sort=sort, + q=q_list, + filter=parsed_filter, ) # Apply field filtering if fields parameter was provided diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c472039b..e39e9289 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -176,6 +176,7 @@ async def get_all_collections( request: Request, sort: Optional[List[Dict[str, Any]]] = None, q: Optional[List[str]] = None, + filter: Optional[Dict[str, Any]] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from Elasticsearch, supporting pagination. @@ -185,6 +186,7 @@ async def get_all_collections( request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. q (Optional[List[str]]): Free text search terms. + filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. Returns: A tuple of (collections, next pagination token if any). @@ -225,6 +227,9 @@ async def get_all_collections( if token: body["search_after"] = [token] + # Build the query part of the body + query_parts = [] + # Apply free text query if provided if q: # For collections, we want to search across all relevant fields @@ -251,10 +256,98 @@ async def get_all_collections( } ) - # Add the query to the body using bool query with should clauses - body["query"] = { - "bool": {"should": should_clauses, "minimum_should_match": 1} - } + # Add the free text query to the query parts + query_parts.append( + {"bool": {"should": should_clauses, "minimum_should_match": 1}} + ) + + # Apply structured filter if provided + if filter: + try: + # For simple direct query handling without using to_es + # This is a simplified approach that handles common filter patterns + if isinstance(filter, dict): + # Check if this is a CQL2 filter with op and args + if "op" in filter and "args" in filter: + op = filter.get("op") + args = filter.get("args") + + # Handle equality operator + if ( + op == "=" + and len(args) == 2 + and isinstance(args[0], dict) + and "property" in args[0] + ): + field = args[0]["property"] + value = args[1] + + # Handle different field types + if field == "id": + # Direct match on ID field + query_parts.append({"term": {"id": value}}) + elif field == "title": + # Match on title field + query_parts.append({"match": {"title": value}}) + elif field == "description": + # Match on description field + query_parts.append({"match": {"description": value}}) + else: + # For other fields, try a multi-match query + query_parts.append( + { + "multi_match": { + "query": value, + "fields": [field, f"{field}.*"], + "type": "best_fields", + } + } + ) + + # Handle regex operator + elif ( + op == "=~" + and len(args) == 2 + and isinstance(args[0], dict) + and "property" in args[0] + ): + field = args[0]["property"] + pattern = args[1].replace(".*", "*") + + # Use wildcard query for pattern matching + query_parts.append( + { + "wildcard": { + field: { + "value": pattern, + "case_insensitive": True, + } + } + } + ) + + # For other operators, use a match_all query as fallback + else: + query_parts.append({"match_all": {}}) + else: + # Not a valid CQL2 filter + query_parts.append({"match_all": {}}) + else: + # Not a dictionary + query_parts.append({"match_all": {}}) + except Exception as e: + logger = logging.getLogger(__name__) + logger.error(f"Error converting filter to Elasticsearch: {e}") + # If there's an error, add a query that matches nothing + query_parts.append({"bool": {"must_not": {"match_all": {}}}}) + raise + + # Combine all query parts with AND logic if there are multiple + if query_parts: + if len(query_parts) == 1: + body["query"] = query_parts[0] + else: + body["query"] = {"bool": {"must": query_parts}} # Execute the search response = await self.client.search( diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index de546079..d6e5368a 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -163,7 +163,7 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes # Use unique prefixes to avoid conflicts between tests test_prefix = f"q-get-{uuid.uuid4().hex[:8]}" - # Create collections with different content to test free text search + # Create collections with different content to test structured filter test_collections = [ { "id": f"{test_prefix}-sentinel", @@ -226,3 +226,68 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes # Should only find the landsat collection assert len(found_collections) == 1 assert found_collections[0]["id"] == f"{test_prefix}-modis" + + +@pytest.mark.asyncio +async def test_collections_filter_search(app_client, txn_client, load_test_data): + """Verify GET /collections honors the filter parameter for structured search.""" + # Create multiple collections with different content + base_collection = load_test_data("test_collection.json") + + # Use unique prefixes to avoid conflicts between tests + test_prefix = f"filter-{uuid.uuid4().hex[:8]}" + + # Create collections with different content to test structured filter + test_collections = [ + { + "id": f"{test_prefix}-sentinel", + "title": "Sentinel-2 Collection", + "description": "Collection of Sentinel-2 data", + "summaries": {"platform": ["sentinel-2a", "sentinel-2b"]}, + }, + { + "id": f"{test_prefix}-landsat", + "title": "Landsat Collection", + "description": "Collection of Landsat data", + "summaries": {"platform": ["landsat-8", "landsat-9"]}, + }, + { + "id": f"{test_prefix}-modis", + "title": "MODIS Collection", + "description": "Collection of MODIS data", + "summaries": {"platform": ["terra", "aqua"]}, + }, + ] + + for i, coll in enumerate(test_collections): + test_collection = base_collection.copy() + test_collection["id"] = coll["id"] + test_collection["title"] = coll["title"] + test_collection["description"] = coll["description"] + test_collection["summaries"] = coll["summaries"] + await create_collection(txn_client, test_collection) + + # Test structured filter for collections with specific ID + import json + + # Create a simple filter for exact ID match - similar to what works in Postman + filter_expr = {"op": "=", "args": [{"property": "id"}, f"{test_prefix}-sentinel"]} + + # Convert to JSON string for URL parameter + filter_json = json.dumps(filter_expr) + + # Use the exact format that works in Postman + resp = await app_client.get( + f"/collections?filter={filter_json}", + ) + assert resp.status_code == 200 + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + found_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Should only find the sentinel collection + assert len(found_collections) == 1 + assert found_collections[0]["id"] == f"{test_prefix}-sentinel" From 14a625d638e3105820ae073d3e5614840e758c6b Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 19 Sep 2025 15:07:13 +0800 Subject: [PATCH 2/8] filter lang --- stac_fastapi/core/stac_fastapi/core/core.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index ca6db253..0fdfe658 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -229,6 +229,8 @@ async def all_collections( fields: Optional[List[str]] = None, sortby: Optional[str] = None, filter_expr: Optional[str] = None, + query: Optional[str] = None, + filter_lang: Optional[str] = None, q: Optional[Union[str, List[str]]] = None, **kwargs, ) -> stac_types.Collections: @@ -237,7 +239,9 @@ async def all_collections( Args: fields (Optional[List[str]]): Fields to include or exclude from the results. sortby (Optional[str]): Sorting options for the results. - filter_expr (Optional[str]): Structured filter in CQL2 format. + filter_expr (Optional[str]): Structured filter expression in CQL2 JSON format. + query (Optional[str]): Legacy query parameter (deprecated). + filter_lang (Optional[str]): Must be 'cql2-json' if specified, other values will result in an error. q (Optional[Union[str, List[str]]]): Free text search terms. **kwargs: Keyword arguments from the request. @@ -285,7 +289,16 @@ async def all_collections( try: import orjson - parsed_filter = orjson.loads(filter_expr) + # Check if filter_lang is specified and not cql2-json + if filter_lang is not None and filter_lang != "cql2-json": + # Raise an error for unsupported filter languages + raise HTTPException( + status_code=400, + detail=f"Only 'cql2-json' filter language is supported for collections. Got '{filter_lang}'.", + ) + + # For GET requests, we only handle cql2-json + parsed_filter = orjson.loads(unquote_plus(filter_expr)) except Exception as e: raise HTTPException( status_code=400, detail=f"Invalid filter parameter: {e}" From e469a0ec8a5b4ac6397b309ab9a635f0c7c99d8a Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 26 Sep 2025 23:37:07 +0800 Subject: [PATCH 3/8] add filter extension to app --- CHANGELOG.md | 2 ++ .../elasticsearch/stac_fastapi/elasticsearch/app.py | 9 +++++---- stac_fastapi/opensearch/stac_fastapi/opensearch/app.py | 9 +++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01420b39..c1f47b44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- GET `/collections` collection search structured filtering extension with support for both CQL2 JSON and CQL2 text formats. [#471](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/471) + ### Changed ### Fixed diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 18b52147..e3292cbf 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -34,9 +34,10 @@ create_collection_index, create_index_templates, ) -from stac_fastapi.extensions.core import ( # CollectionSearchFilterExtension, +from stac_fastapi.extensions.core import ( AggregationExtension, CollectionSearchExtension, + CollectionSearchFilterExtension, FilterExtension, FreeTextExtension, SortExtension, @@ -123,9 +124,9 @@ # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]), SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]), FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), - # CollectionSearchFilterExtension( - # conformance_classes=[FilterConformanceClasses.COLLECTIONS] - # ), + CollectionSearchFilterExtension( + conformance_classes=[FilterConformanceClasses.COLLECTIONS] + ), FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), ] diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index 34d55589..b842d929 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -28,9 +28,10 @@ from stac_fastapi.core.route_dependencies import get_route_dependencies from stac_fastapi.core.session import Session from stac_fastapi.core.utilities import get_bool_env -from stac_fastapi.extensions.core import ( # CollectionSearchFilterExtension, +from stac_fastapi.extensions.core import ( AggregationExtension, CollectionSearchExtension, + CollectionSearchFilterExtension, FilterExtension, FreeTextExtension, SortExtension, @@ -123,9 +124,9 @@ # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]), SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]), FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), - # CollectionSearchFilterExtension( - # conformance_classes=[FilterConformanceClasses.COLLECTIONS] - # ), + CollectionSearchFilterExtension( + conformance_classes=[FilterConformanceClasses.COLLECTIONS] + ), FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), ] From 098872a0407eedcba3fcfe50b1d1bdef67b2585f Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 26 Sep 2025 23:39:44 +0800 Subject: [PATCH 4/8] update readme --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 78c02408..cf0c5ff1 100644 --- a/README.md +++ b/README.md @@ -131,9 +131,18 @@ SFEOS implements extended capabilities for the `/collections` endpoint, allowing - Searches across multiple text fields including title, description, and keywords - Supports partial word matching and relevance-based sorting +- **Structured Filtering**: Filter collections using CQL2 expressions + - JSON format: `/collections?filter={"op":"=","args":[{"property":"id"},"sentinel-2"]}&filter-lang=cql2-json` + - Text format: `/collections?filter=id='sentinel-2'&filter-lang=cql2-text` (note: string values must be quoted) + - Advanced text format: `/collections?filter=id LIKE '%sentinel%'&filter-lang=cql2-text` (supports LIKE, BETWEEN, etc.) + - Supports both CQL2 JSON and CQL2 text formats with various operators + - Enables precise filtering on any collection property + +> **Note on HTTP Methods**: All collection search extensions (sorting, field selection, free text search, and structured filtering) currently only support GET requests. POST requests with these parameters in the request body are not yet supported. + These extensions make it easier to build user interfaces that display and navigate through collections efficiently. -> **Configuration**: Collection search extensions can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled. +> **Configuration**: Collection search extensions (sorting, field selection, free text search, and structured filtering) can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled. > **Note**: Sorting is only available on fields that are indexed for sorting in Elasticsearch/OpenSearch. With the default mappings, you can sort on: > - `id` (keyword field) @@ -156,7 +165,7 @@ This project is organized into several packages, each with a specific purpose: - Shared logic and utilities that improve code reuse between backends - **stac_fastapi_elasticsearch**: Complete implementation of the STAC API using Elasticsearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`. -- + - **stac_fastapi_opensearch**: Complete implementation of the STAC API using OpenSearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`. ## Examples From dc31907dc41476c168d59c9f9f373edb851d7656 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 27 Sep 2025 00:02:55 +0800 Subject: [PATCH 5/8] support cql2-text --- stac_fastapi/core/stac_fastapi/core/core.py | 42 ++++++-- .../elasticsearch/database_logic.py | 97 +++---------------- .../stac_fastapi/opensearch/database_logic.py | 38 ++++++-- .../tests/api/test_api_search_collections.py | 48 +++++++-- 4 files changed, 117 insertions(+), 108 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 0fdfe658..f0952f8b 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -248,7 +248,6 @@ async def all_collections( Returns: A Collections object containing all the collections in the database and links to various resources. """ - print("filter: ", filter_expr) request = kwargs["request"] base_url = str(request.base_url) limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) @@ -287,18 +286,45 @@ async def all_collections( parsed_filter = None if filter_expr is not None: try: - import orjson - - # Check if filter_lang is specified and not cql2-json - if filter_lang is not None and filter_lang != "cql2-json": + # Check if filter_lang is specified and not one of the supported formats + if filter_lang is not None and filter_lang not in [ + "cql2-json", + "cql2-text", + ]: # Raise an error for unsupported filter languages raise HTTPException( status_code=400, - detail=f"Only 'cql2-json' filter language is supported for collections. Got '{filter_lang}'.", + detail=f"Input should be 'cql2-json' or 'cql2-text' for collections. Got '{filter_lang}'.", ) - # For GET requests, we only handle cql2-json - parsed_filter = orjson.loads(unquote_plus(filter_expr)) + # Handle different filter formats + try: + if filter_lang == "cql2-text" or filter_lang is None: + # For cql2-text or when no filter_lang is specified, try both formats + try: + # First try to parse as JSON + parsed_filter = orjson.loads(unquote_plus(filter_expr)) + except Exception: + # If that fails, use pygeofilter to convert CQL2-text to CQL2-JSON + try: + # Parse CQL2-text and convert to CQL2-JSON + text_filter = unquote_plus(filter_expr) + parsed_ast = parse_cql2_text(text_filter) + parsed_filter = to_cql2(parsed_ast) + except Exception as e: + # If parsing fails, provide a helpful error message + raise HTTPException( + status_code=400, + detail=f"Invalid CQL2-text filter: {e}. Please check your syntax.", + ) + else: + # For explicit cql2-json, parse as JSON + parsed_filter = orjson.loads(unquote_plus(filter_expr)) + except Exception as e: + # Catch any other parsing errors + raise HTTPException( + status_code=400, detail=f"Error parsing filter: {e}" + ) except Exception as e: raise HTTPException( status_code=400, detail=f"Invalid filter parameter: {e}" diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index e39e9289..b3907c8e 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -263,91 +263,20 @@ async def get_all_collections( # Apply structured filter if provided if filter: - try: - # For simple direct query handling without using to_es - # This is a simplified approach that handles common filter patterns - if isinstance(filter, dict): - # Check if this is a CQL2 filter with op and args - if "op" in filter and "args" in filter: - op = filter.get("op") - args = filter.get("args") - - # Handle equality operator - if ( - op == "=" - and len(args) == 2 - and isinstance(args[0], dict) - and "property" in args[0] - ): - field = args[0]["property"] - value = args[1] - - # Handle different field types - if field == "id": - # Direct match on ID field - query_parts.append({"term": {"id": value}}) - elif field == "title": - # Match on title field - query_parts.append({"match": {"title": value}}) - elif field == "description": - # Match on description field - query_parts.append({"match": {"description": value}}) - else: - # For other fields, try a multi-match query - query_parts.append( - { - "multi_match": { - "query": value, - "fields": [field, f"{field}.*"], - "type": "best_fields", - } - } - ) - - # Handle regex operator - elif ( - op == "=~" - and len(args) == 2 - and isinstance(args[0], dict) - and "property" in args[0] - ): - field = args[0]["property"] - pattern = args[1].replace(".*", "*") - - # Use wildcard query for pattern matching - query_parts.append( - { - "wildcard": { - field: { - "value": pattern, - "case_insensitive": True, - } - } - } - ) - - # For other operators, use a match_all query as fallback - else: - query_parts.append({"match_all": {}}) - else: - # Not a valid CQL2 filter - query_parts.append({"match_all": {}}) - else: - # Not a dictionary - query_parts.append({"match_all": {}}) - except Exception as e: - logger = logging.getLogger(__name__) - logger.error(f"Error converting filter to Elasticsearch: {e}") - # If there's an error, add a query that matches nothing - query_parts.append({"bool": {"must_not": {"match_all": {}}}}) - raise - - # Combine all query parts with AND logic if there are multiple + # Convert string filter to dict if needed + if isinstance(filter, str): + filter = orjson.loads(filter) + # Convert the filter to an Elasticsearch query using the filter module + es_query = filter_module.to_es(await self.get_queryables_mapping(), filter) + query_parts.append(es_query) + + # Combine all query parts with AND logic if query_parts: - if len(query_parts) == 1: - body["query"] = query_parts[0] - else: - body["query"] = {"bool": {"must": query_parts}} + body["query"] = ( + query_parts[0] + if len(query_parts) == 1 + else {"bool": {"must": query_parts}} + ) # Execute the search response = await self.client.search( diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index f4b8abd0..793f54e7 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -160,6 +160,7 @@ async def get_all_collections( request: Request, sort: Optional[List[Dict[str, Any]]] = None, q: Optional[List[str]] = None, + filter: Optional[Dict[str, Any]] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from Elasticsearch, supporting pagination. @@ -169,6 +170,7 @@ async def get_all_collections( request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. q (Optional[List[str]]): Free text search terms. + filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. Returns: A tuple of (collections, next pagination token if any). @@ -191,7 +193,7 @@ async def get_all_collections( raise HTTPException( status_code=400, detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. " - + "Text fields are not sortable by default in OpenSearch. " + + "Text fields are not sortable by default in Elasticsearch. " + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ", ) formatted_sort.append({field: {"order": direction}}) @@ -209,6 +211,9 @@ async def get_all_collections( if token: body["search_after"] = [token] + # Build the query part of the body + query_parts = [] + # Apply free text query if provided if q: # For collections, we want to search across all relevant fields @@ -235,11 +240,29 @@ async def get_all_collections( } ) - # Add the query to the body using bool query with should clauses - body["query"] = { - "bool": {"should": should_clauses, "minimum_should_match": 1} - } + # Add the free text query to the query parts + query_parts.append( + {"bool": {"should": should_clauses, "minimum_should_match": 1}} + ) + # Apply structured filter if provided + if filter: + # Convert string filter to dict if needed + if isinstance(filter, str): + filter = orjson.loads(filter) + # Convert the filter to an Elasticsearch query using the filter module + es_query = filter_module.to_es(await self.get_queryables_mapping(), filter) + query_parts.append(es_query) + + # Combine all query parts with AND logic + if query_parts: + body["query"] = ( + query_parts[0] + if len(query_parts) == 1 + else {"bool": {"must": query_parts}} + ) + + # Execute the search response = await self.client.search( index=COLLECTIONS_INDEX, body=body, @@ -255,7 +278,6 @@ async def get_all_collections( next_token = None if len(hits) == limit: - # Ensure we have a valid sort value for next_token next_token_values = hits[-1].get("sort") if next_token_values: next_token = next_token_values[0] @@ -580,6 +602,10 @@ async def apply_cql2_filter( otherwise the original Search object. """ if _filter is not None: + if isinstance(_filter, str): + import json + + _filter = json.loads(_filter) es_query = filter_module.to_es(await self.get_queryables_mapping(), _filter) search = search.filter(es_query) diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index d6e5368a..4751c74f 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -267,27 +267,55 @@ async def test_collections_filter_search(app_client, txn_client, load_test_data) test_collection["summaries"] = coll["summaries"] await create_collection(txn_client, test_collection) - # Test structured filter for collections with specific ID + # Ensure collections are searchable + from ..conftest import refresh_indices + + await refresh_indices(txn_client) + + # Test 1: CQL2-JSON format - filter for one of our test collections import json - # Create a simple filter for exact ID match - similar to what works in Postman - filter_expr = {"op": "=", "args": [{"property": "id"}, f"{test_prefix}-sentinel"]} + # Use the ID of the first test collection for the filter + test_collection_id = test_collections[0]["id"] + + # Create a simple filter for exact ID match using CQL2-JSON + filter_expr = {"op": "=", "args": [{"property": "id"}, test_collection_id]} # Convert to JSON string for URL parameter filter_json = json.dumps(filter_expr) - # Use the exact format that works in Postman + # Use CQL2-JSON format with explicit filter-lang resp = await app_client.get( - f"/collections?filter={filter_json}", + f"/collections?filter={filter_json}&filter-lang=cql2-json", ) + assert resp.status_code == 200 resp_json = resp.json() - # Filter collections to only include the ones we created for this test + # Should find exactly one collection with the specified ID found_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + c for c in resp_json["collections"] if c["id"] == test_collection_id ] - # Should only find the sentinel collection - assert len(found_collections) == 1 - assert found_collections[0]["id"] == f"{test_prefix}-sentinel" + assert ( + len(found_collections) == 1 + ), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)}" + assert found_collections[0]["id"] == test_collection_id + + # Test 2: CQL2-text format with LIKE operator for more advanced filtering + # Use a filter that will match the test collection ID we created + filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'" + + resp = await app_client.get( + f"/collections?filter={filter_text}&filter-lang=cql2-text", + ) + assert resp.status_code == 200 + resp_json = resp.json() + + # Should find the test collection we created + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] + assert ( + len(found_collections) >= 1 + ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter" From e5c4b72a202b309ff34036ede826fa035c382c24 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 27 Sep 2025 00:14:23 +0800 Subject: [PATCH 6/8] update changelog, clean up --- CHANGELOG.md | 2 +- stac_fastapi/core/stac_fastapi/core/core.py | 2 -- stac_fastapi/tests/api/test_api_search_collections.py | 9 ++------- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1f47b44..ceb8f6e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added -- GET `/collections` collection search structured filtering extension with support for both CQL2 JSON and CQL2 text formats. [#471](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/471) +- GET `/collections` collection search structured filter extension with support for both cql2-json and cql2-text formats. [#475](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/475) ### Changed diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index f0952f8b..e70f62a8 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -229,7 +229,6 @@ async def all_collections( fields: Optional[List[str]] = None, sortby: Optional[str] = None, filter_expr: Optional[str] = None, - query: Optional[str] = None, filter_lang: Optional[str] = None, q: Optional[Union[str, List[str]]] = None, **kwargs, @@ -240,7 +239,6 @@ async def all_collections( fields (Optional[List[str]]): Fields to include or exclude from the results. sortby (Optional[str]): Sorting options for the results. filter_expr (Optional[str]): Structured filter expression in CQL2 JSON format. - query (Optional[str]): Legacy query parameter (deprecated). filter_lang (Optional[str]): Must be 'cql2-json' if specified, other values will result in an error. q (Optional[Union[str, List[str]]]): Free text search terms. **kwargs: Keyword arguments from the request. diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 4751c74f..343af86e 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -1,8 +1,9 @@ +import json import uuid import pytest -from ..conftest import create_collection +from ..conftest import create_collection, refresh_indices @pytest.mark.asyncio @@ -267,14 +268,8 @@ async def test_collections_filter_search(app_client, txn_client, load_test_data) test_collection["summaries"] = coll["summaries"] await create_collection(txn_client, test_collection) - # Ensure collections are searchable - from ..conftest import refresh_indices - await refresh_indices(txn_client) - # Test 1: CQL2-JSON format - filter for one of our test collections - import json - # Use the ID of the first test collection for the filter test_collection_id = test_collections[0]["id"] From 1f726c7af26c4c3b6c1b5e673033418a5ca35f6a Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 27 Sep 2025 00:21:11 +0800 Subject: [PATCH 7/8] more clean up --- .../stac_fastapi/opensearch/database_logic.py | 12 ++++-------- .../tests/api/test_api_search_collections.py | 1 - 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 793f54e7..e94dee25 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -162,7 +162,7 @@ async def get_all_collections( q: Optional[List[str]] = None, filter: Optional[Dict[str, Any]] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: - """Retrieve a list of collections from Elasticsearch, supporting pagination. + """Retrieve a list of collections from Opensearch, supporting pagination. Args: token (Optional[str]): The pagination token. @@ -193,7 +193,7 @@ async def get_all_collections( raise HTTPException( status_code=400, detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. " - + "Text fields are not sortable by default in Elasticsearch. " + + "Text fields are not sortable by default in Opensearch. " + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ", ) formatted_sort.append({field: {"order": direction}}) @@ -250,7 +250,7 @@ async def get_all_collections( # Convert string filter to dict if needed if isinstance(filter, str): filter = orjson.loads(filter) - # Convert the filter to an Elasticsearch query using the filter module + # Convert the filter to an Opensearch query using the filter module es_query = filter_module.to_es(await self.get_queryables_mapping(), filter) query_parts.append(es_query) @@ -298,7 +298,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: NotFoundError: If the specified Item does not exist in the Collection. Notes: - The Item is retrieved from the Elasticsearch database using the `client.get` method, + The Item is retrieved from the Opensearch database using the `client.get` method, with the index for the Collection as the target index and the combined `mk_item_id` as the document id. """ try: @@ -602,10 +602,6 @@ async def apply_cql2_filter( otherwise the original Search object. """ if _filter is not None: - if isinstance(_filter, str): - import json - - _filter = json.loads(_filter) es_query = filter_module.to_es(await self.get_queryables_mapping(), _filter) search = search.filter(es_query) diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 343af86e..85a393fc 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -164,7 +164,6 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes # Use unique prefixes to avoid conflicts between tests test_prefix = f"q-get-{uuid.uuid4().hex[:8]}" - # Create collections with different content to test structured filter test_collections = [ { "id": f"{test_prefix}-sentinel", From 1ccf95c0d8b36715a2a333118079ad4f88732d95 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 27 Sep 2025 13:17:21 +0800 Subject: [PATCH 8/8] fix doc string --- stac_fastapi/core/stac_fastapi/core/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index e70f62a8..7c6fdf2f 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -238,8 +238,8 @@ async def all_collections( Args: fields (Optional[List[str]]): Fields to include or exclude from the results. sortby (Optional[str]): Sorting options for the results. - filter_expr (Optional[str]): Structured filter expression in CQL2 JSON format. - filter_lang (Optional[str]): Must be 'cql2-json' if specified, other values will result in an error. + filter_expr (Optional[str]): Structured filter expression in CQL2 JSON or CQL2-text format. + filter_lang (Optional[str]): Must be 'cql2-json' or 'cql2-text' if specified, other values will result in an error. q (Optional[Union[str, List[str]]]): Free text search terms. **kwargs: Keyword arguments from the request.