diff --git a/CHANGELOG.md b/CHANGELOG.md index 6323533ec..87806ba60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed +- Removed ENV_MAX_LIMIT environment variable; maximum limits are now handled by the default global limit environment variable. [#482](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/482) +- Changed the default and maximum pagination limits for collections/items endpoints. [#482](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/482) + ### Fixed ## [v6.5.1] - 2025-09-30 diff --git a/README.md b/README.md index 37dbdba7d..737fb8128 100644 --- a/README.md +++ b/README.md @@ -323,9 +323,11 @@ You can customize additional settings in your `.env` file: | `ENABLE_COLLECTIONS_SEARCH` | Enable collection search extensions (sort, fields, free text search, structured filtering, and datetime filtering) on the core `/collections` endpoint. | `true` | Optional | | `ENABLE_COLLECTIONS_SEARCH_ROUTE` | Enable the custom `/collections-search` endpoint (both GET and POST methods). When disabled, the custom endpoint will not be available, but collection search extensions will still be available on the core `/collections` endpoint if `ENABLE_COLLECTIONS_SEARCH` is true. | `false` | Optional | | `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. This is useful for deployments where mutating the catalog via the API should be prevented. If set to `true`, the POST `/collections` route for search will be unavailable in the API. | `true` | Optional | -| `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional | +| `STAC_GLOBAL_COLLECTION_MAX_LIMIT` | Configures the maximum number of STAC collections that can be returned in a single search request. | N/A | Optional | +| `STAC_DEFAULT_COLLECTION_LIMIT` | Configures the default number of STAC collections returned when no limit parameter is specified in the request. | `300` | Optional | +| `STAC_GLOBAL_ITEM_MAX_LIMIT` | Configures the maximum number of STAC items that can be returned in a single search request. | N/A | Optional | +| `STAC_DEFAULT_ITEM_LIMIT` | Configures the default number of STAC items returned when no limit parameter is specified in the request. | `10` | Optional | | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | -| `ENV_MAX_LIMIT` | Configures the environment variable in SFEOS to override the default `MAX_LIMIT`, which controls the limit parameter for returned items and STAC collections. | `10,000` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | > [!NOTE] diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index cc175b6ce..f4a9058e0 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -274,34 +274,31 @@ async def all_collections( """ base_url = str(request.base_url) - # Get the global limit from environment variable - global_limit = None - env_limit = os.getenv("STAC_ITEM_LIMIT") - if env_limit: - try: - global_limit = int(env_limit) - except ValueError: - # Handle invalid integer in environment variable - pass - - # Apply global limit if it exists - if global_limit is not None: - # If a limit was provided, use the smaller of the two - if limit is not None: - limit = min(limit, global_limit) - else: - limit = global_limit + global_max_limit = ( + int(os.getenv("STAC_GLOBAL_COLLECTION_MAX_LIMIT")) + if os.getenv("STAC_GLOBAL_COLLECTION_MAX_LIMIT") + else None + ) + query_limit = request.query_params.get("limit") + default_limit = int(os.getenv("STAC_DEFAULT_COLLECTION_LIMIT", 300)) + + body_limit = None + try: + if request.method == "POST" and request.body(): + body_data = await request.json() + body_limit = body_data.get("limit") + except Exception: + pass + + if body_limit is not None: + limit = int(body_limit) + elif query_limit: + limit = int(query_limit) else: - # No global limit, use provided limit or default - if limit is None: - query_limit = request.query_params.get("limit") - if query_limit: - try: - limit = int(query_limit) - except ValueError: - limit = 10 - else: - limit = 10 + limit = default_limit + + if global_max_limit is not None: + limit = min(limit, global_max_limit) # Get token from query params only if not already provided (for GET requests) if token is None: @@ -575,7 +572,7 @@ async def item_collection( request (Request): FastAPI Request object. bbox (Optional[BBox]): Optional bounding box filter. datetime (Optional[str]): Optional datetime or interval filter. - limit (Optional[int]): Optional page size. Defaults to env ``STAC_ITEM_LIMIT`` when unset. + limit (Optional[int]): Optional page size. Defaults to env `STAC_DEFAULT_ITEM_LIMIT` when unset. sortby (Optional[str]): Optional sort specification. Accepts repeated values like ``sortby=-properties.datetime`` or ``sortby=+id``. Bare fields (e.g. ``sortby=id``) imply ascending order. @@ -666,15 +663,12 @@ async def get_search( q (Optional[List[str]]): Free text query to filter the results. intersects (Optional[str]): GeoJSON geometry to search in. kwargs: Additional parameters to be passed to the API. - Returns: ItemCollection: Collection of `Item` objects representing the search results. Raises: HTTPException: If any error occurs while searching the catalog. """ - limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) - base_args = { "collections": collections, "ids": ids, @@ -749,6 +743,34 @@ async def post_search( Raises: HTTPException: If there is an error with the cql2_json filter. """ + global_max_limit = ( + int(os.getenv("STAC_GLOBAL_ITEM_MAX_LIMIT")) + if os.getenv("STAC_GLOBAL_ITEM_MAX_LIMIT") + else None + ) + query_limit = request.query_params.get("limit") + default_limit = int(os.getenv("STAC_DEFAULT_ITEM_LIMIT", 10)) + + body_limit = None + try: + if request.method == "POST" and request.body(): + body_data = await request.json() + body_limit = body_data.get("limit") + except Exception: + pass + + if body_limit is not None: + limit = int(body_limit) + elif query_limit: + limit = int(query_limit) + else: + limit = default_limit + + if global_max_limit: + limit = min(limit, global_max_limit) + + search_request.limit = limit + base_url = str(request.base_url) search = self.database.make_search() @@ -825,7 +847,6 @@ async def post_search( if hasattr(search_request, "sortby") and getattr(search_request, "sortby"): sort = self.database.populate_sort(getattr(search_request, "sortby")) - limit = 10 if search_request.limit: limit = search_request.limit diff --git a/stac_fastapi/core/stac_fastapi/core/utilities.py b/stac_fastapi/core/stac_fastapi/core/utilities.py index c54348af0..de6536567 100644 --- a/stac_fastapi/core/stac_fastapi/core/utilities.py +++ b/stac_fastapi/core/stac_fastapi/core/utilities.py @@ -10,15 +10,7 @@ from stac_fastapi.types.stac import Item - -def get_max_limit(): - """ - Retrieve a MAX_LIMIT value from an environment variable. - - Returns: - int: The int value parsed from the environment variable. - """ - return int(os.getenv("ENV_MAX_LIMIT", 10000)) +MAX_LIMIT = 10000 def get_bool_env(name: str, default: Union[bool, str] = False) -> bool: diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c3f6f8530..35b6ae31a 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -17,7 +17,7 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit +from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon, get_bool_env from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings from stac_fastapi.elasticsearch.config import ( ElasticsearchSettings as SyncElasticsearchSettings, @@ -762,7 +762,7 @@ async def execute_search( index_param = ITEM_INDICES query = add_collections_to_body(collection_ids, query) - max_result_window = get_max_limit() + max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 9d814ba92..05aac1763 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -17,7 +17,7 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit +from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon, get_bool_env from stac_fastapi.extensions.core.transaction.request import ( PartialCollection, PartialItem, @@ -775,7 +775,7 @@ async def execute_search( search_body["sort"] = sort if sort else DEFAULT_SORT - max_result_window = get_max_limit() + max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index 6fdc2fb60..0b0733825 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -1475,70 +1475,102 @@ def create_items(date_prefix: str, start_day: int, count: int) -> dict: @pytest.mark.asyncio -async def test_collections_limit_env_variable(app_client, txn_client, load_test_data): - limit = "5" - os.environ["STAC_ITEM_LIMIT"] = limit - item = load_test_data("test_collection.json") +async def test_global_collection_max_limit_set(app_client, txn_client, load_test_data): + """Test with global collection max limit set, expect cap the limit""" + os.environ["STAC_GLOBAL_COLLECTION_MAX_LIMIT"] = "5" for i in range(10): - test_collection = item.copy() - test_collection["id"] = f"test-collection-env-{i}" - test_collection["title"] = f"Test Collection Env {i}" + test_collection = load_test_data("test_collection.json") + test_collection_id = f"test-collection-global-{i}" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + + resp = await app_client.get("/collections?limit=10") + assert resp.status_code == 200 + resp_json = resp.json() + assert len(resp_json["collections"]) == 5 + + del os.environ["STAC_GLOBAL_COLLECTION_MAX_LIMIT"] + + +@pytest.mark.asyncio +async def test_default_collection_limit(app_client, txn_client, load_test_data): + """Test default collection limit set, should use default when no limit provided""" + os.environ["STAC_DEFAULT_COLLECTION_LIMIT"] = "5" + + for i in range(10): + test_collection = load_test_data("test_collection.json") + test_collection_id = f"test-collection-default-{i}" + test_collection["id"] = test_collection_id await create_collection(txn_client, test_collection) resp = await app_client.get("/collections") assert resp.status_code == 200 resp_json = resp.json() - assert int(limit) == len(resp_json["collections"]) + assert len(resp_json["collections"]) == 5 + + del os.environ["STAC_DEFAULT_COLLECTION_LIMIT"] @pytest.mark.asyncio -async def test_search_collection_limit_env_variable( - app_client, txn_client, load_test_data -): - limit = "5" - os.environ["STAC_ITEM_LIMIT"] = limit +async def test_no_global_item_max_limit_set(app_client, txn_client, load_test_data): + """Test with no global max limit set for items""" + + if "STAC_GLOBAL_ITEM_MAX_LIMIT" in os.environ: + del os.environ["STAC_GLOBAL_ITEM_MAX_LIMIT"] test_collection = load_test_data("test_collection.json") - test_collection_id = "test-collection-search-limit" + test_collection_id = "test-collection-no-global-limit" test_collection["id"] = test_collection_id await create_collection(txn_client, test_collection) item = load_test_data("test_item.json") item["collection"] = test_collection_id - for i in range(10): + for i in range(20): test_item = item.copy() - test_item["id"] = f"test-item-search-{i}" + test_item["id"] = f"test-item-{i}" await create_item(txn_client, test_item) - resp = await app_client.get("/search", params={"collections": [test_collection_id]}) + resp = await app_client.get(f"/collections/{test_collection_id}/items?limit=20") + assert resp.status_code == 200 + resp_json = resp.json() + assert len(resp_json["features"]) == 20 + + resp = await app_client.get(f"/search?collections={test_collection_id}&limit=20") assert resp.status_code == 200 resp_json = resp.json() - assert int(limit) == len(resp_json["features"]) + assert len(resp_json["features"]) == 20 + resp = await app_client.post( + "/search", json={"collections": [test_collection_id], "limit": 20} + ) + assert resp.status_code == 200 + resp_json = resp.json() + assert len(resp_json["features"]) == 20 -async def test_search_max_item_limit( - app_client, load_test_data, txn_client, monkeypatch -): - limit = "10" - monkeypatch.setenv("ENV_MAX_LIMIT", limit) - test_collection = load_test_data("test_collection.json") - await create_collection(txn_client, test_collection) +@pytest.mark.asyncio +async def test_no_global_collection_max_limit_set( + app_client, txn_client, load_test_data +): + """Test with no global max limit set for collections""" - item = load_test_data("test_item.json") + if "STAC_GLOBAL_COLLECTION_MAX_LIMIT" in os.environ: + del os.environ["STAC_GLOBAL_COLLECTION_MAX_LIMIT"] + test_collections = [] for i in range(20): - test_item = item.copy() - test_item["id"] = f"test-item-collection-{i}" - await create_item(txn_client, test_item) - - resp = await app_client.get("/search", params={"limit": 20}) + test_collection = load_test_data("test_collection.json") + test_collection_id = f"test-collection-no-global-limit-{i}" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + test_collections.append(test_collection_id) + resp = await app_client.get("/collections?limit=20") assert resp.status_code == 200 resp_json = resp.json() - assert int(limit) == len(resp_json["features"]) + assert len(resp_json["collections"]) == 20 @pytest.mark.asyncio diff --git a/stac_fastapi/tests/api/test_api_item_collection.py b/stac_fastapi/tests/api/test_api_item_collection.py index 2b1aa8e79..f0d07a744 100644 --- a/stac_fastapi/tests/api/test_api_item_collection.py +++ b/stac_fastapi/tests/api/test_api_item_collection.py @@ -10,29 +10,74 @@ @pytest.mark.asyncio -async def test_item_collection_limit_env_variable( +async def test_global_item_max_limit_set(app_client, txn_client, load_test_data): + """Test with global max limit set for items, expect cap the ?limit parameter""" + os.environ["STAC_GLOBAL_ITEM_MAX_LIMIT"] = "5" + + test_collection = load_test_data("test_collection.json") + test_collection_id = "test-collection-for-items" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + item["collection"] = test_collection_id + + for i in range(10): + test_item = item.copy() + test_item["id"] = f"test-item-{i}" + await create_item(txn_client, test_item) + + resp = await app_client.get(f"/collections/{test_collection_id}/items?limit=10") + assert resp.status_code == 200 + resp_json = resp.json() + assert len(resp_json["features"]) == 5 + + resp = await app_client.get(f"/search?collections={test_collection_id}&limit=10") + assert resp.status_code == 200 + resp_json = resp.json() + assert len(resp_json["features"]) == 5 + + del os.environ["STAC_GLOBAL_ITEM_MAX_LIMIT"] + + +@pytest.mark.asyncio +async def test_default_item_limit_without_limit_parameter_set( app_client, txn_client, load_test_data ): - limit = "5" - os.environ["STAC_ITEM_LIMIT"] = limit + """Test default item limit set, should use default when no limit provided""" + os.environ["STAC_DEFAULT_ITEM_LIMIT"] = "10" test_collection = load_test_data("test_collection.json") - test_collection_id = "test-collection-items-limit" + test_collection_id = "test-collection-items" test_collection["id"] = test_collection_id await create_collection(txn_client, test_collection) item = load_test_data("test_item.json") item["collection"] = test_collection_id - for i in range(10): + for i in range(15): test_item = item.copy() - test_item["id"] = f"test-item-collection-{i}" + test_item["id"] = f"test-item-{i}" await create_item(txn_client, test_item) resp = await app_client.get(f"/collections/{test_collection_id}/items") assert resp.status_code == 200 resp_json = resp.json() - assert int(limit) == len(resp_json["features"]) + assert len(resp_json["features"]) == 10 + + resp = await app_client.get(f"/search?collections={test_collection_id}") + assert resp.status_code == 200 + resp_json = resp.json() + assert len(resp_json["features"]) == 10 + + # Also test POST search to compare + search_body = {"collections": [test_collection_id]} + resp = await app_client.post("/search", json=search_body) + assert resp.status_code == 200 + resp_json = resp.json() + assert len(resp_json["features"]) == 10 + + del os.environ["STAC_DEFAULT_ITEM_LIMIT"] @pytest.mark.asyncio