diff --git a/CHANGELOG.md b/CHANGELOG.md index 01420b39..ceb8f6e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- GET `/collections` collection search structured filter extension with support for both cql2-json and cql2-text formats. [#475](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/475) + ### Changed ### Fixed diff --git a/README.md b/README.md index 78c02408..cf0c5ff1 100644 --- a/README.md +++ b/README.md @@ -131,9 +131,18 @@ SFEOS implements extended capabilities for the `/collections` endpoint, allowing - Searches across multiple text fields including title, description, and keywords - Supports partial word matching and relevance-based sorting +- **Structured Filtering**: Filter collections using CQL2 expressions + - JSON format: `/collections?filter={"op":"=","args":[{"property":"id"},"sentinel-2"]}&filter-lang=cql2-json` + - Text format: `/collections?filter=id='sentinel-2'&filter-lang=cql2-text` (note: string values must be quoted) + - Advanced text format: `/collections?filter=id LIKE '%sentinel%'&filter-lang=cql2-text` (supports LIKE, BETWEEN, etc.) + - Supports both CQL2 JSON and CQL2 text formats with various operators + - Enables precise filtering on any collection property + +> **Note on HTTP Methods**: All collection search extensions (sorting, field selection, free text search, and structured filtering) currently only support GET requests. POST requests with these parameters in the request body are not yet supported. + These extensions make it easier to build user interfaces that display and navigate through collections efficiently. -> **Configuration**: Collection search extensions can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled. +> **Configuration**: Collection search extensions (sorting, field selection, free text search, and structured filtering) can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled. > **Note**: Sorting is only available on fields that are indexed for sorting in Elasticsearch/OpenSearch. With the default mappings, you can sort on: > - `id` (keyword field) @@ -156,7 +165,7 @@ This project is organized into several packages, each with a specific purpose: - Shared logic and utilities that improve code reuse between backends - **stac_fastapi_elasticsearch**: Complete implementation of the STAC API using Elasticsearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`. -- + - **stac_fastapi_opensearch**: Complete implementation of the STAC API using OpenSearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`. ## Examples diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 9d01deaf..7c6fdf2f 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -228,6 +228,8 @@ async def all_collections( self, fields: Optional[List[str]] = None, sortby: Optional[str] = None, + filter_expr: Optional[str] = None, + filter_lang: Optional[str] = None, q: Optional[Union[str, List[str]]] = None, **kwargs, ) -> stac_types.Collections: @@ -236,7 +238,9 @@ async def all_collections( Args: fields (Optional[List[str]]): Fields to include or exclude from the results. sortby (Optional[str]): Sorting options for the results. - q (Optional[List[str]]): Free text search terms. + filter_expr (Optional[str]): Structured filter expression in CQL2 JSON or CQL2-text format. + filter_lang (Optional[str]): Must be 'cql2-json' or 'cql2-text' if specified, other values will result in an error. + q (Optional[Union[str, List[str]]]): Free text search terms. **kwargs: Keyword arguments from the request. Returns: @@ -276,8 +280,61 @@ async def all_collections( if q is not None: q_list = [q] if isinstance(q, str) else q + # Parse the filter parameter if provided + parsed_filter = None + if filter_expr is not None: + try: + # Check if filter_lang is specified and not one of the supported formats + if filter_lang is not None and filter_lang not in [ + "cql2-json", + "cql2-text", + ]: + # Raise an error for unsupported filter languages + raise HTTPException( + status_code=400, + detail=f"Input should be 'cql2-json' or 'cql2-text' for collections. Got '{filter_lang}'.", + ) + + # Handle different filter formats + try: + if filter_lang == "cql2-text" or filter_lang is None: + # For cql2-text or when no filter_lang is specified, try both formats + try: + # First try to parse as JSON + parsed_filter = orjson.loads(unquote_plus(filter_expr)) + except Exception: + # If that fails, use pygeofilter to convert CQL2-text to CQL2-JSON + try: + # Parse CQL2-text and convert to CQL2-JSON + text_filter = unquote_plus(filter_expr) + parsed_ast = parse_cql2_text(text_filter) + parsed_filter = to_cql2(parsed_ast) + except Exception as e: + # If parsing fails, provide a helpful error message + raise HTTPException( + status_code=400, + detail=f"Invalid CQL2-text filter: {e}. Please check your syntax.", + ) + else: + # For explicit cql2-json, parse as JSON + parsed_filter = orjson.loads(unquote_plus(filter_expr)) + except Exception as e: + # Catch any other parsing errors + raise HTTPException( + status_code=400, detail=f"Error parsing filter: {e}" + ) + except Exception as e: + raise HTTPException( + status_code=400, detail=f"Invalid filter parameter: {e}" + ) + collections, next_token = await self.database.get_all_collections( - token=token, limit=limit, request=request, sort=sort, q=q_list + token=token, + limit=limit, + request=request, + sort=sort, + q=q_list, + filter=parsed_filter, ) # Apply field filtering if fields parameter was provided diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 18b52147..e3292cbf 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -34,9 +34,10 @@ create_collection_index, create_index_templates, ) -from stac_fastapi.extensions.core import ( # CollectionSearchFilterExtension, +from stac_fastapi.extensions.core import ( AggregationExtension, CollectionSearchExtension, + CollectionSearchFilterExtension, FilterExtension, FreeTextExtension, SortExtension, @@ -123,9 +124,9 @@ # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]), SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]), FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), - # CollectionSearchFilterExtension( - # conformance_classes=[FilterConformanceClasses.COLLECTIONS] - # ), + CollectionSearchFilterExtension( + conformance_classes=[FilterConformanceClasses.COLLECTIONS] + ), FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), ] diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c472039b..b3907c8e 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -176,6 +176,7 @@ async def get_all_collections( request: Request, sort: Optional[List[Dict[str, Any]]] = None, q: Optional[List[str]] = None, + filter: Optional[Dict[str, Any]] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from Elasticsearch, supporting pagination. @@ -185,6 +186,7 @@ async def get_all_collections( request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. q (Optional[List[str]]): Free text search terms. + filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. Returns: A tuple of (collections, next pagination token if any). @@ -225,6 +227,9 @@ async def get_all_collections( if token: body["search_after"] = [token] + # Build the query part of the body + query_parts = [] + # Apply free text query if provided if q: # For collections, we want to search across all relevant fields @@ -251,10 +256,27 @@ async def get_all_collections( } ) - # Add the query to the body using bool query with should clauses - body["query"] = { - "bool": {"should": should_clauses, "minimum_should_match": 1} - } + # Add the free text query to the query parts + query_parts.append( + {"bool": {"should": should_clauses, "minimum_should_match": 1}} + ) + + # Apply structured filter if provided + if filter: + # Convert string filter to dict if needed + if isinstance(filter, str): + filter = orjson.loads(filter) + # Convert the filter to an Elasticsearch query using the filter module + es_query = filter_module.to_es(await self.get_queryables_mapping(), filter) + query_parts.append(es_query) + + # Combine all query parts with AND logic + if query_parts: + body["query"] = ( + query_parts[0] + if len(query_parts) == 1 + else {"bool": {"must": query_parts}} + ) # Execute the search response = await self.client.search( diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index 34d55589..b842d929 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -28,9 +28,10 @@ from stac_fastapi.core.route_dependencies import get_route_dependencies from stac_fastapi.core.session import Session from stac_fastapi.core.utilities import get_bool_env -from stac_fastapi.extensions.core import ( # CollectionSearchFilterExtension, +from stac_fastapi.extensions.core import ( AggregationExtension, CollectionSearchExtension, + CollectionSearchFilterExtension, FilterExtension, FreeTextExtension, SortExtension, @@ -123,9 +124,9 @@ # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]), SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]), FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), - # CollectionSearchFilterExtension( - # conformance_classes=[FilterConformanceClasses.COLLECTIONS] - # ), + CollectionSearchFilterExtension( + conformance_classes=[FilterConformanceClasses.COLLECTIONS] + ), FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), ] diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index f4b8abd0..e94dee25 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -160,8 +160,9 @@ async def get_all_collections( request: Request, sort: Optional[List[Dict[str, Any]]] = None, q: Optional[List[str]] = None, + filter: Optional[Dict[str, Any]] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: - """Retrieve a list of collections from Elasticsearch, supporting pagination. + """Retrieve a list of collections from Opensearch, supporting pagination. Args: token (Optional[str]): The pagination token. @@ -169,6 +170,7 @@ async def get_all_collections( request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. q (Optional[List[str]]): Free text search terms. + filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. Returns: A tuple of (collections, next pagination token if any). @@ -191,7 +193,7 @@ async def get_all_collections( raise HTTPException( status_code=400, detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. " - + "Text fields are not sortable by default in OpenSearch. " + + "Text fields are not sortable by default in Opensearch. " + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ", ) formatted_sort.append({field: {"order": direction}}) @@ -209,6 +211,9 @@ async def get_all_collections( if token: body["search_after"] = [token] + # Build the query part of the body + query_parts = [] + # Apply free text query if provided if q: # For collections, we want to search across all relevant fields @@ -235,11 +240,29 @@ async def get_all_collections( } ) - # Add the query to the body using bool query with should clauses - body["query"] = { - "bool": {"should": should_clauses, "minimum_should_match": 1} - } + # Add the free text query to the query parts + query_parts.append( + {"bool": {"should": should_clauses, "minimum_should_match": 1}} + ) + + # Apply structured filter if provided + if filter: + # Convert string filter to dict if needed + if isinstance(filter, str): + filter = orjson.loads(filter) + # Convert the filter to an Opensearch query using the filter module + es_query = filter_module.to_es(await self.get_queryables_mapping(), filter) + query_parts.append(es_query) + + # Combine all query parts with AND logic + if query_parts: + body["query"] = ( + query_parts[0] + if len(query_parts) == 1 + else {"bool": {"must": query_parts}} + ) + # Execute the search response = await self.client.search( index=COLLECTIONS_INDEX, body=body, @@ -255,7 +278,6 @@ async def get_all_collections( next_token = None if len(hits) == limit: - # Ensure we have a valid sort value for next_token next_token_values = hits[-1].get("sort") if next_token_values: next_token = next_token_values[0] @@ -276,7 +298,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: NotFoundError: If the specified Item does not exist in the Collection. Notes: - The Item is retrieved from the Elasticsearch database using the `client.get` method, + The Item is retrieved from the Opensearch database using the `client.get` method, with the index for the Collection as the target index and the combined `mk_item_id` as the document id. """ try: diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index de546079..85a393fc 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -1,8 +1,9 @@ +import json import uuid import pytest -from ..conftest import create_collection +from ..conftest import create_collection, refresh_indices @pytest.mark.asyncio @@ -163,7 +164,6 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes # Use unique prefixes to avoid conflicts between tests test_prefix = f"q-get-{uuid.uuid4().hex[:8]}" - # Create collections with different content to test free text search test_collections = [ { "id": f"{test_prefix}-sentinel", @@ -226,3 +226,90 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes # Should only find the landsat collection assert len(found_collections) == 1 assert found_collections[0]["id"] == f"{test_prefix}-modis" + + +@pytest.mark.asyncio +async def test_collections_filter_search(app_client, txn_client, load_test_data): + """Verify GET /collections honors the filter parameter for structured search.""" + # Create multiple collections with different content + base_collection = load_test_data("test_collection.json") + + # Use unique prefixes to avoid conflicts between tests + test_prefix = f"filter-{uuid.uuid4().hex[:8]}" + + # Create collections with different content to test structured filter + test_collections = [ + { + "id": f"{test_prefix}-sentinel", + "title": "Sentinel-2 Collection", + "description": "Collection of Sentinel-2 data", + "summaries": {"platform": ["sentinel-2a", "sentinel-2b"]}, + }, + { + "id": f"{test_prefix}-landsat", + "title": "Landsat Collection", + "description": "Collection of Landsat data", + "summaries": {"platform": ["landsat-8", "landsat-9"]}, + }, + { + "id": f"{test_prefix}-modis", + "title": "MODIS Collection", + "description": "Collection of MODIS data", + "summaries": {"platform": ["terra", "aqua"]}, + }, + ] + + for i, coll in enumerate(test_collections): + test_collection = base_collection.copy() + test_collection["id"] = coll["id"] + test_collection["title"] = coll["title"] + test_collection["description"] = coll["description"] + test_collection["summaries"] = coll["summaries"] + await create_collection(txn_client, test_collection) + + await refresh_indices(txn_client) + + # Use the ID of the first test collection for the filter + test_collection_id = test_collections[0]["id"] + + # Create a simple filter for exact ID match using CQL2-JSON + filter_expr = {"op": "=", "args": [{"property": "id"}, test_collection_id]} + + # Convert to JSON string for URL parameter + filter_json = json.dumps(filter_expr) + + # Use CQL2-JSON format with explicit filter-lang + resp = await app_client.get( + f"/collections?filter={filter_json}&filter-lang=cql2-json", + ) + + assert resp.status_code == 200 + resp_json = resp.json() + + # Should find exactly one collection with the specified ID + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] + + assert ( + len(found_collections) == 1 + ), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)}" + assert found_collections[0]["id"] == test_collection_id + + # Test 2: CQL2-text format with LIKE operator for more advanced filtering + # Use a filter that will match the test collection ID we created + filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'" + + resp = await app_client.get( + f"/collections?filter={filter_text}&filter-lang=cql2-text", + ) + assert resp.status_code == 200 + resp_json = resp.json() + + # Should find the test collection we created + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] + assert ( + len(found_collections) >= 1 + ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter"