diff --git a/CHANGELOG.md b/CHANGELOG.md index 8396708a..d0794819 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- GET `/collections` collection search free text extension ex. `/collections?q=sentinel`. [#470](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/470) - Added `USE_DATETIME` environment variable to configure datetime search behavior in SFEOS. [#452](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/452) - GET `/collections` collection search sort extension ex. `/collections?sortby=+id`. [#456](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/456) - GET `/collections` collection search fields extension ex. `/collections?fields=id,title`. [#465](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/465) diff --git a/README.md b/README.md index c1be16ae..78c02408 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,11 @@ SFEOS implements extended capabilities for the `/collections` endpoint, allowing - Example: `/collections?fields=id,title,description` - This helps reduce payload size when only certain fields are needed +- **Free Text Search**: Search across collection text fields using the `q` parameter + - Example: `/collections?q=landsat` + - Searches across multiple text fields including title, description, and keywords + - Supports partial word matching and relevance-based sorting + These extensions make it easier to build user interfaces that display and navigate through collections efficiently. > **Configuration**: Collection search extensions can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled. diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 4f35ed41..9d01deaf 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -225,13 +225,18 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage: return landing_page async def all_collections( - self, fields: Optional[List[str]] = None, sortby: Optional[str] = None, **kwargs + self, + fields: Optional[List[str]] = None, + sortby: Optional[str] = None, + q: Optional[Union[str, List[str]]] = None, + **kwargs, ) -> stac_types.Collections: """Read all collections from the database. Args: fields (Optional[List[str]]): Fields to include or exclude from the results. sortby (Optional[str]): Sorting options for the results. + q (Optional[List[str]]): Free text search terms. **kwargs: Keyword arguments from the request. Returns: @@ -266,8 +271,13 @@ async def all_collections( if parsed_sort: sort = parsed_sort + # Convert q to a list if it's a string + q_list = None + if q is not None: + q_list = [q] if isinstance(q, str) else q + collections, next_token = await self.database.get_all_collections( - token=token, limit=limit, request=request, sort=sort + token=token, limit=limit, request=request, sort=sort, q=q_list ) # Apply field filtering if fields parameter was provided diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 25c865f8..18b52147 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -45,8 +45,7 @@ ) from stac_fastapi.extensions.core.fields import FieldsConformanceClasses from stac_fastapi.extensions.core.filter import FilterConformanceClasses - -# from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses +from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses from stac_fastapi.extensions.core.query import QueryConformanceClasses from stac_fastapi.extensions.core.sort import SortConformanceClasses from stac_fastapi.extensions.third_party import BulkTransactionExtension @@ -127,7 +126,7 @@ # CollectionSearchFilterExtension( # conformance_classes=[FilterConformanceClasses.COLLECTIONS] # ), - # FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), + FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), ] # Initialize collection search with its extensions diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 35cd8d9e..c472039b 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -175,6 +175,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + q: Optional[List[str]] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from Elasticsearch, supporting pagination. @@ -183,6 +184,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + q (Optional[List[str]]): Free text search terms. Returns: A tuple of (collections, next pagination token if any). @@ -223,6 +225,38 @@ async def get_all_collections( if token: body["search_after"] = [token] + # Apply free text query if provided + if q: + # For collections, we want to search across all relevant fields + should_clauses = [] + + # For each search term + for term in q: + # Create a multi_match query for each term + for field in [ + "id", + "title", + "description", + "keywords", + "summaries.platform", + "summaries.constellation", + "providers.name", + "providers.url", + ]: + should_clauses.append( + { + "wildcard": { + field: {"value": f"*{term}*", "case_insensitive": True} + } + } + ) + + # Add the query to the body using bool query with should clauses + body["query"] = { + "bool": {"should": should_clauses, "minimum_should_match": 1} + } + + # Execute the search response = await self.client.search( index=COLLECTIONS_INDEX, body=body, diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index 362eb9bf..34d55589 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -39,8 +39,7 @@ ) from stac_fastapi.extensions.core.fields import FieldsConformanceClasses from stac_fastapi.extensions.core.filter import FilterConformanceClasses - -# from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses +from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses from stac_fastapi.extensions.core.query import QueryConformanceClasses from stac_fastapi.extensions.core.sort import SortConformanceClasses from stac_fastapi.extensions.third_party import BulkTransactionExtension @@ -127,7 +126,7 @@ # CollectionSearchFilterExtension( # conformance_classes=[FilterConformanceClasses.COLLECTIONS] # ), - # FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), + FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]), ] # Initialize collection search with its extensions diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 94a95b32..f4b8abd0 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -159,6 +159,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + q: Optional[List[str]] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from Elasticsearch, supporting pagination. @@ -167,6 +168,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + q (Optional[List[str]]): Free text search terms. Returns: A tuple of (collections, next pagination token if any). @@ -207,6 +209,37 @@ async def get_all_collections( if token: body["search_after"] = [token] + # Apply free text query if provided + if q: + # For collections, we want to search across all relevant fields + should_clauses = [] + + # For each search term + for term in q: + # Create a multi_match query for each term + for field in [ + "id", + "title", + "description", + "keywords", + "summaries.platform", + "summaries.constellation", + "providers.name", + "providers.url", + ]: + should_clauses.append( + { + "wildcard": { + field: {"value": f"*{term}*", "case_insensitive": True} + } + } + ) + + # Add the query to the body using bool query with should clauses + body["query"] = { + "bool": {"should": should_clauses, "minimum_should_match": 1} + } + response = await self.client.search( index=COLLECTIONS_INDEX, body=body, diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index ffd84831..de546079 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -152,3 +152,77 @@ async def test_collections_fields(app_client, txn_client, load_test_data): assert "title" in collection assert "description" not in collection assert "links" in collection + + +@pytest.mark.asyncio +async def test_collections_free_text_search_get(app_client, txn_client, load_test_data): + """Verify GET /collections honors the q parameter for free text search.""" + # Create multiple collections with different content + base_collection = load_test_data("test_collection.json") + + # Use unique prefixes to avoid conflicts between tests + test_prefix = f"q-get-{uuid.uuid4().hex[:8]}" + + # Create collections with different content to test free text search + test_collections = [ + { + "id": f"{test_prefix}-sentinel", + "title": "Sentinel-2 Collection", + "description": "Collection of Sentinel-2 data", + "summaries": {"platform": ["sentinel-2a", "sentinel-2b"]}, + }, + { + "id": f"{test_prefix}-landsat", + "title": "Landsat Collection", + "description": "Collection of Landsat data", + "summaries": {"platform": ["landsat-8", "landsat-9"]}, + }, + { + "id": f"{test_prefix}-modis", + "title": "MODIS Collection", + "description": "Collection of MODIS data", + "summaries": {"platform": ["terra", "aqua"]}, + }, + ] + + for i, coll in enumerate(test_collections): + test_collection = base_collection.copy() + test_collection["id"] = coll["id"] + test_collection["title"] = coll["title"] + test_collection["description"] = coll["description"] + test_collection["summaries"] = coll["summaries"] + await create_collection(txn_client, test_collection) + + # Test free text search for "sentinel" + resp = await app_client.get( + "/collections", + params=[("q", "sentinel")], + ) + assert resp.status_code == 200 + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + found_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Should only find the sentinel collection + assert len(found_collections) == 1 + assert found_collections[0]["id"] == f"{test_prefix}-sentinel" + + # Test free text search for "landsat" + resp = await app_client.get( + "/collections", + params=[("q", "modis")], + ) + assert resp.status_code == 200 + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + found_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Should only find the landsat collection + assert len(found_collections) == 1 + assert found_collections[0]["id"] == f"{test_prefix}-modis"