stac-utils · jonhealy1 · Sep 27, 2025 · Sep 19, 2025 · Sep 19, 2025 · Sep 26, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### Added
 
+- GET `/collections` collection search structured filter extension with support for both cql2-json and cql2-text formats. [#475](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/475)
+
 ### Changed
 
 ### Fixed

diff --git a/README.md b/README.md
@@ -131,9 +131,18 @@ SFEOS implements extended capabilities for the `/collections` endpoint, allowing
   - Searches across multiple text fields including title, description, and keywords
   - Supports partial word matching and relevance-based sorting
 
+- **Structured Filtering**: Filter collections using CQL2 expressions
+  - JSON format: `/collections?filter={"op":"=","args":[{"property":"id"},"sentinel-2"]}&filter-lang=cql2-json`
+  - Text format: `/collections?filter=id='sentinel-2'&filter-lang=cql2-text` (note: string values must be quoted)
+  - Advanced text format: `/collections?filter=id LIKE '%sentinel%'&filter-lang=cql2-text` (supports LIKE, BETWEEN, etc.)
+  - Supports both CQL2 JSON and CQL2 text formats with various operators
+  - Enables precise filtering on any collection property
+
+> **Note on HTTP Methods**: All collection search extensions (sorting, field selection, free text search, and structured filtering) currently only support GET requests. POST requests with these parameters in the request body are not yet supported.
+
 These extensions make it easier to build user interfaces that display and navigate through collections efficiently.
 
-> **Configuration**: Collection search extensions can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled.
+> **Configuration**: Collection search extensions (sorting, field selection, free text search, and structured filtering) can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled.
 
 > **Note**: Sorting is only available on fields that are indexed for sorting in Elasticsearch/OpenSearch. With the default mappings, you can sort on:
 > - `id` (keyword field)
@@ -156,7 +165,7 @@ This project is organized into several packages, each with a specific purpose:
   - Shared logic and utilities that improve code reuse between backends
 
 - **stac_fastapi_elasticsearch**: Complete implementation of the STAC API using Elasticsearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`.
-- 
+
 - **stac_fastapi_opensearch**: Complete implementation of the STAC API using OpenSearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`.
 
 ## Examples

diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py
@@ -228,6 +228,8 @@ async def all_collections(
         self,
         fields: Optional[List[str]] = None,
         sortby: Optional[str] = None,
+        filter_expr: Optional[str] = None,
+        filter_lang: Optional[str] = None,
         q: Optional[Union[str, List[str]]] = None,
         **kwargs,
     ) -> stac_types.Collections:
@@ -236,7 +238,9 @@ async def all_collections(
         Args:
             fields (Optional[List[str]]): Fields to include or exclude from the results.
             sortby (Optional[str]): Sorting options for the results.
-            q (Optional[List[str]]): Free text search terms.
+            filter_expr (Optional[str]): Structured filter expression in CQL2 JSON or CQL2-text format.
+            filter_lang (Optional[str]): Must be 'cql2-json' or 'cql2-text' if specified, other values will result in an error.
+            q (Optional[Union[str, List[str]]]): Free text search terms.
             **kwargs: Keyword arguments from the request.
 
         Returns:
@@ -276,8 +280,61 @@ async def all_collections(
         if q is not None:
             q_list = [q] if isinstance(q, str) else q
 
+        # Parse the filter parameter if provided
+        parsed_filter = None
+        if filter_expr is not None:
+            try:
+                # Check if filter_lang is specified and not one of the supported formats
+                if filter_lang is not None and filter_lang not in [
+                    "cql2-json",
+                    "cql2-text",
+                ]:
+                    # Raise an error for unsupported filter languages
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"Input should be 'cql2-json' or 'cql2-text' for collections. Got '{filter_lang}'.",
+                    )
+
+                # Handle different filter formats
+                try:
+                    if filter_lang == "cql2-text" or filter_lang is None:
+                        # For cql2-text or when no filter_lang is specified, try both formats
+                        try:
+                            # First try to parse as JSON
+                            parsed_filter = orjson.loads(unquote_plus(filter_expr))
+                        except Exception:
+                            # If that fails, use pygeofilter to convert CQL2-text to CQL2-JSON
+                            try:
+                                # Parse CQL2-text and convert to CQL2-JSON
+                                text_filter = unquote_plus(filter_expr)
+                                parsed_ast = parse_cql2_text(text_filter)
+                                parsed_filter = to_cql2(parsed_ast)
+                            except Exception as e:
+                                # If parsing fails, provide a helpful error message
+                                raise HTTPException(
+                                    status_code=400,
+                                    detail=f"Invalid CQL2-text filter: {e}. Please check your syntax.",
+                                )
+                    else:
+                        # For explicit cql2-json, parse as JSON
+                        parsed_filter = orjson.loads(unquote_plus(filter_expr))
+                except Exception as e:
+                    # Catch any other parsing errors
+                    raise HTTPException(
+                        status_code=400, detail=f"Error parsing filter: {e}"
+                    )
+            except Exception as e:
+                raise HTTPException(
+                    status_code=400, detail=f"Invalid filter parameter: {e}"
+                )
+
         collections, next_token = await self.database.get_all_collections(
-            token=token, limit=limit, request=request, sort=sort, q=q_list
+            token=token,
+            limit=limit,
+            request=request,
+            sort=sort,
+            q=q_list,
+            filter=parsed_filter,
         )
 
         # Apply field filtering if fields parameter was provided

diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py
@@ -34,9 +34,10 @@
     create_collection_index,
     create_index_templates,
 )
-from stac_fastapi.extensions.core import (  # CollectionSearchFilterExtension,
+from stac_fastapi.extensions.core import (
     AggregationExtension,
     CollectionSearchExtension,
+    CollectionSearchFilterExtension,
     FilterExtension,
     FreeTextExtension,
     SortExtension,
@@ -123,9 +124,9 @@
         # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]),
         SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]),
         FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]),
-        # CollectionSearchFilterExtension(
-        #     conformance_classes=[FilterConformanceClasses.COLLECTIONS]
-        # ),
+        CollectionSearchFilterExtension(
+            conformance_classes=[FilterConformanceClasses.COLLECTIONS]
+        ),
         FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
     ]
 

diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py
@@ -176,6 +176,7 @@ async def get_all_collections(
         request: Request,
         sort: Optional[List[Dict[str, Any]]] = None,
         q: Optional[List[str]] = None,
+        filter: Optional[Dict[str, Any]] = None,
     ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
         """Retrieve a list of collections from Elasticsearch, supporting pagination.
 
@@ -185,6 +186,7 @@ async def get_all_collections(
             request (Request): The FastAPI request object.
             sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
             q (Optional[List[str]]): Free text search terms.
+            filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
 
         Returns:
             A tuple of (collections, next pagination token if any).
@@ -225,6 +227,9 @@ async def get_all_collections(
         if token:
             body["search_after"] = [token]
 
+        # Build the query part of the body
+        query_parts = []
+
         # Apply free text query if provided
         if q:
             # For collections, we want to search across all relevant fields
@@ -251,10 +256,27 @@ async def get_all_collections(
                         }
                     )
 
-            # Add the query to the body using bool query with should clauses
-            body["query"] = {
-                "bool": {"should": should_clauses, "minimum_should_match": 1}
-            }
+            # Add the free text query to the query parts
+            query_parts.append(
+                {"bool": {"should": should_clauses, "minimum_should_match": 1}}
+            )
+
+        # Apply structured filter if provided
+        if filter:
+            # Convert string filter to dict if needed
+            if isinstance(filter, str):
+                filter = orjson.loads(filter)
+            # Convert the filter to an Elasticsearch query using the filter module
+            es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
+            query_parts.append(es_query)
+
+        # Combine all query parts with AND logic
+        if query_parts:
+            body["query"] = (
+                query_parts[0]
+                if len(query_parts) == 1
+                else {"bool": {"must": query_parts}}
+            )
 
         # Execute the search
         response = await self.client.search(

diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py
@@ -28,9 +28,10 @@
 from stac_fastapi.core.route_dependencies import get_route_dependencies
 from stac_fastapi.core.session import Session
 from stac_fastapi.core.utilities import get_bool_env
-from stac_fastapi.extensions.core import (  # CollectionSearchFilterExtension,
+from stac_fastapi.extensions.core import (
     AggregationExtension,
     CollectionSearchExtension,
+    CollectionSearchFilterExtension,
     FilterExtension,
     FreeTextExtension,
     SortExtension,
@@ -123,9 +124,9 @@
         # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]),
         SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]),
         FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]),
-        # CollectionSearchFilterExtension(
-        #     conformance_classes=[FilterConformanceClasses.COLLECTIONS]
-        # ),
+        CollectionSearchFilterExtension(
+            conformance_classes=[FilterConformanceClasses.COLLECTIONS]
+        ),
         FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
     ]
 

diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py
@@ -160,15 +160,17 @@ async def get_all_collections(
         request: Request,
         sort: Optional[List[Dict[str, Any]]] = None,
         q: Optional[List[str]] = None,
+        filter: Optional[Dict[str, Any]] = None,
     ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
-        """Retrieve a list of collections from Elasticsearch, supporting pagination.
+        """Retrieve a list of collections from Opensearch, supporting pagination.
 
         Args:
             token (Optional[str]): The pagination token.
             limit (int): The number of results to return.
             request (Request): The FastAPI request object.
             sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
             q (Optional[List[str]]): Free text search terms.
+            filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
 
         Returns:
             A tuple of (collections, next pagination token if any).
@@ -191,7 +193,7 @@ async def get_all_collections(
                         raise HTTPException(
                             status_code=400,
                             detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. "
-                            + "Text fields are not sortable by default in OpenSearch. "
+                            + "Text fields are not sortable by default in Opensearch. "
                             + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ",
                         )
                     formatted_sort.append({field: {"order": direction}})
@@ -209,6 +211,9 @@ async def get_all_collections(
         if token:
             body["search_after"] = [token]
 
+        # Build the query part of the body
+        query_parts = []
+
         # Apply free text query if provided
         if q:
             # For collections, we want to search across all relevant fields
@@ -235,11 +240,29 @@ async def get_all_collections(
                         }
                     )
 
-            # Add the query to the body using bool query with should clauses
-            body["query"] = {
-                "bool": {"should": should_clauses, "minimum_should_match": 1}
-            }
+            # Add the free text query to the query parts
+            query_parts.append(
+                {"bool": {"should": should_clauses, "minimum_should_match": 1}}
+            )
+
+        # Apply structured filter if provided
+        if filter:
+            # Convert string filter to dict if needed
+            if isinstance(filter, str):
+                filter = orjson.loads(filter)
+            # Convert the filter to an Opensearch query using the filter module
+            es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
+            query_parts.append(es_query)
+
+        # Combine all query parts with AND logic
+        if query_parts:
+            body["query"] = (
+                query_parts[0]
+                if len(query_parts) == 1
+                else {"bool": {"must": query_parts}}
+            )
 
+        # Execute the search
         response = await self.client.search(
             index=COLLECTIONS_INDEX,
             body=body,
@@ -255,7 +278,6 @@ async def get_all_collections(
 
         next_token = None
         if len(hits) == limit:
-            # Ensure we have a valid sort value for next_token
             next_token_values = hits[-1].get("sort")
             if next_token_values:
                 next_token = next_token_values[0]
@@ -276,7 +298,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
             NotFoundError: If the specified Item does not exist in the Collection.
 
         Notes:
-            The Item is retrieved from the Elasticsearch database using the `client.get` method,
+            The Item is retrieved from the Opensearch database using the `client.get` method,
             with the index for the Collection as the target index and the combined `mk_item_id` as the document id.
         """
         try: