Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Added

- GET `/collections` collection search structured filter extension with support for both cql2-json and cql2-text formats. [#475](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/475)

### Changed

### Fixed
Expand Down
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,18 @@ SFEOS implements extended capabilities for the `/collections` endpoint, allowing
- Searches across multiple text fields including title, description, and keywords
- Supports partial word matching and relevance-based sorting

- **Structured Filtering**: Filter collections using CQL2 expressions
- JSON format: `/collections?filter={"op":"=","args":[{"property":"id"},"sentinel-2"]}&filter-lang=cql2-json`
- Text format: `/collections?filter=id='sentinel-2'&filter-lang=cql2-text` (note: string values must be quoted)
- Advanced text format: `/collections?filter=id LIKE '%sentinel%'&filter-lang=cql2-text` (supports LIKE, BETWEEN, etc.)
- Supports both CQL2 JSON and CQL2 text formats with various operators
- Enables precise filtering on any collection property

> **Note on HTTP Methods**: All collection search extensions (sorting, field selection, free text search, and structured filtering) currently only support GET requests. POST requests with these parameters in the request body are not yet supported.

These extensions make it easier to build user interfaces that display and navigate through collections efficiently.

> **Configuration**: Collection search extensions can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled.
> **Configuration**: Collection search extensions (sorting, field selection, free text search, and structured filtering) can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled.

> **Note**: Sorting is only available on fields that are indexed for sorting in Elasticsearch/OpenSearch. With the default mappings, you can sort on:
> - `id` (keyword field)
Expand All @@ -156,7 +165,7 @@ This project is organized into several packages, each with a specific purpose:
- Shared logic and utilities that improve code reuse between backends

- **stac_fastapi_elasticsearch**: Complete implementation of the STAC API using Elasticsearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`.
-

- **stac_fastapi_opensearch**: Complete implementation of the STAC API using OpenSearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`.

## Examples
Expand Down
61 changes: 59 additions & 2 deletions stac_fastapi/core/stac_fastapi/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ async def all_collections(
self,
fields: Optional[List[str]] = None,
sortby: Optional[str] = None,
filter_expr: Optional[str] = None,
filter_lang: Optional[str] = None,
q: Optional[Union[str, List[str]]] = None,
**kwargs,
) -> stac_types.Collections:
Expand All @@ -236,7 +238,9 @@ async def all_collections(
Args:
fields (Optional[List[str]]): Fields to include or exclude from the results.
sortby (Optional[str]): Sorting options for the results.
q (Optional[List[str]]): Free text search terms.
filter_expr (Optional[str]): Structured filter expression in CQL2 JSON or CQL2-text format.
filter_lang (Optional[str]): Must be 'cql2-json' or 'cql2-text' if specified, other values will result in an error.
q (Optional[Union[str, List[str]]]): Free text search terms.
**kwargs: Keyword arguments from the request.

Returns:
Expand Down Expand Up @@ -276,8 +280,61 @@ async def all_collections(
if q is not None:
q_list = [q] if isinstance(q, str) else q

# Parse the filter parameter if provided
parsed_filter = None
if filter_expr is not None:
try:
# Check if filter_lang is specified and not one of the supported formats
if filter_lang is not None and filter_lang not in [
"cql2-json",
"cql2-text",
]:
# Raise an error for unsupported filter languages
raise HTTPException(
status_code=400,
detail=f"Input should be 'cql2-json' or 'cql2-text' for collections. Got '{filter_lang}'.",
)

# Handle different filter formats
try:
if filter_lang == "cql2-text" or filter_lang is None:
# For cql2-text or when no filter_lang is specified, try both formats
try:
# First try to parse as JSON
parsed_filter = orjson.loads(unquote_plus(filter_expr))
except Exception:
# If that fails, use pygeofilter to convert CQL2-text to CQL2-JSON
try:
# Parse CQL2-text and convert to CQL2-JSON
text_filter = unquote_plus(filter_expr)
parsed_ast = parse_cql2_text(text_filter)
parsed_filter = to_cql2(parsed_ast)
except Exception as e:
# If parsing fails, provide a helpful error message
raise HTTPException(
status_code=400,
detail=f"Invalid CQL2-text filter: {e}. Please check your syntax.",
)
else:
# For explicit cql2-json, parse as JSON
parsed_filter = orjson.loads(unquote_plus(filter_expr))
except Exception as e:
# Catch any other parsing errors
raise HTTPException(
status_code=400, detail=f"Error parsing filter: {e}"
)
except Exception as e:
raise HTTPException(
status_code=400, detail=f"Invalid filter parameter: {e}"
)

collections, next_token = await self.database.get_all_collections(
token=token, limit=limit, request=request, sort=sort, q=q_list
token=token,
limit=limit,
request=request,
sort=sort,
q=q_list,
filter=parsed_filter,
)

# Apply field filtering if fields parameter was provided
Expand Down
9 changes: 5 additions & 4 deletions stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
create_collection_index,
create_index_templates,
)
from stac_fastapi.extensions.core import ( # CollectionSearchFilterExtension,
from stac_fastapi.extensions.core import (
AggregationExtension,
CollectionSearchExtension,
CollectionSearchFilterExtension,
FilterExtension,
FreeTextExtension,
SortExtension,
Expand Down Expand Up @@ -123,9 +124,9 @@
# QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]),
SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]),
FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]),
# CollectionSearchFilterExtension(
# conformance_classes=[FilterConformanceClasses.COLLECTIONS]
# ),
CollectionSearchFilterExtension(
conformance_classes=[FilterConformanceClasses.COLLECTIONS]
),
FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ async def get_all_collections(
request: Request,
sort: Optional[List[Dict[str, Any]]] = None,
q: Optional[List[str]] = None,
filter: Optional[Dict[str, Any]] = None,
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
"""Retrieve a list of collections from Elasticsearch, supporting pagination.

Expand All @@ -185,6 +186,7 @@ async def get_all_collections(
request (Request): The FastAPI request object.
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
q (Optional[List[str]]): Free text search terms.
filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.

Returns:
A tuple of (collections, next pagination token if any).
Expand Down Expand Up @@ -225,6 +227,9 @@ async def get_all_collections(
if token:
body["search_after"] = [token]

# Build the query part of the body
query_parts = []

# Apply free text query if provided
if q:
# For collections, we want to search across all relevant fields
Expand All @@ -251,10 +256,27 @@ async def get_all_collections(
}
)

# Add the query to the body using bool query with should clauses
body["query"] = {
"bool": {"should": should_clauses, "minimum_should_match": 1}
}
# Add the free text query to the query parts
query_parts.append(
{"bool": {"should": should_clauses, "minimum_should_match": 1}}
)

# Apply structured filter if provided
if filter:
# Convert string filter to dict if needed
if isinstance(filter, str):
filter = orjson.loads(filter)
# Convert the filter to an Elasticsearch query using the filter module
es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
query_parts.append(es_query)

# Combine all query parts with AND logic
if query_parts:
body["query"] = (
query_parts[0]
if len(query_parts) == 1
else {"bool": {"must": query_parts}}
)

# Execute the search
response = await self.client.search(
Expand Down
9 changes: 5 additions & 4 deletions stac_fastapi/opensearch/stac_fastapi/opensearch/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
from stac_fastapi.core.route_dependencies import get_route_dependencies
from stac_fastapi.core.session import Session
from stac_fastapi.core.utilities import get_bool_env
from stac_fastapi.extensions.core import ( # CollectionSearchFilterExtension,
from stac_fastapi.extensions.core import (
AggregationExtension,
CollectionSearchExtension,
CollectionSearchFilterExtension,
FilterExtension,
FreeTextExtension,
SortExtension,
Expand Down Expand Up @@ -123,9 +124,9 @@
# QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]),
SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]),
FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]),
# CollectionSearchFilterExtension(
# conformance_classes=[FilterConformanceClasses.COLLECTIONS]
# ),
CollectionSearchFilterExtension(
conformance_classes=[FilterConformanceClasses.COLLECTIONS]
),
FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
]

Expand Down
38 changes: 30 additions & 8 deletions stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,17 @@ async def get_all_collections(
request: Request,
sort: Optional[List[Dict[str, Any]]] = None,
q: Optional[List[str]] = None,
filter: Optional[Dict[str, Any]] = None,
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
"""Retrieve a list of collections from Elasticsearch, supporting pagination.
"""Retrieve a list of collections from Opensearch, supporting pagination.

Args:
token (Optional[str]): The pagination token.
limit (int): The number of results to return.
request (Request): The FastAPI request object.
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
q (Optional[List[str]]): Free text search terms.
filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.

Returns:
A tuple of (collections, next pagination token if any).
Expand All @@ -191,7 +193,7 @@ async def get_all_collections(
raise HTTPException(
status_code=400,
detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. "
+ "Text fields are not sortable by default in OpenSearch. "
+ "Text fields are not sortable by default in Opensearch. "
+ "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ",
)
formatted_sort.append({field: {"order": direction}})
Expand All @@ -209,6 +211,9 @@ async def get_all_collections(
if token:
body["search_after"] = [token]

# Build the query part of the body
query_parts = []

# Apply free text query if provided
if q:
# For collections, we want to search across all relevant fields
Expand All @@ -235,11 +240,29 @@ async def get_all_collections(
}
)

# Add the query to the body using bool query with should clauses
body["query"] = {
"bool": {"should": should_clauses, "minimum_should_match": 1}
}
# Add the free text query to the query parts
query_parts.append(
{"bool": {"should": should_clauses, "minimum_should_match": 1}}
)

# Apply structured filter if provided
if filter:
# Convert string filter to dict if needed
if isinstance(filter, str):
filter = orjson.loads(filter)
# Convert the filter to an Opensearch query using the filter module
es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
query_parts.append(es_query)

# Combine all query parts with AND logic
if query_parts:
body["query"] = (
query_parts[0]
if len(query_parts) == 1
else {"bool": {"must": query_parts}}
)

# Execute the search
response = await self.client.search(
index=COLLECTIONS_INDEX,
body=body,
Expand All @@ -255,7 +278,6 @@ async def get_all_collections(

next_token = None
if len(hits) == limit:
# Ensure we have a valid sort value for next_token
next_token_values = hits[-1].get("sort")
if next_token_values:
next_token = next_token_values[0]
Expand All @@ -276,7 +298,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
NotFoundError: If the specified Item does not exist in the Collection.

Notes:
The Item is retrieved from the Elasticsearch database using the `client.get` method,
The Item is retrieved from the Opensearch database using the `client.get` method,
with the index for the Collection as the target index and the combined `mk_item_id` as the document id.
"""
try:
Expand Down
Loading