From 6380f7c0f069c4bfe07a57b1c1b4784bcd30864d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 13 Apr 2026 15:47:36 +0200 Subject: [PATCH 01/11] Centralize pagination validation test --- tests/dependencies/pagination_test.py | 29 ++++++++++++++++++++++++++ tests/routers/openml/task_list_test.py | 22 ------------------- 2 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 tests/dependencies/pagination_test.py diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py new file mode 100644 index 0000000..3e4fa1d --- /dev/null +++ b/tests/dependencies/pagination_test.py @@ -0,0 +1,29 @@ +from typing import Any + +import pytest +from pydantic import ValidationError + +from routers.dependencies import Pagination + + +def test_pagination_defaults() -> None: + """Pagination has expected defaults when no values are provided.""" + pagination = Pagination() + assert pagination.offset == 0 + assert pagination.limit == 100 + + +@pytest.mark.parametrize( + ("kwargs", "expected_field"), + [ + ({"limit": "abc", "offset": 0}, "limit"), + ({"limit": 5, "offset": "xyz"}, "offset"), + ], + ids=["bad_limit_type", "bad_offset_type"], +) +def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None: + """Non-integer values for limit or offset raise a ValidationError.""" + with pytest.raises(ValidationError) as exc_info: + Pagination(**kwargs) + errors = exc_info.value.errors() + assert any(error["loc"] == (expected_field,) for error in errors) diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py index 67d8539..ee9eb09 100644 --- a/tests/routers/openml/task_list_test.py +++ b/tests/routers/openml/task_list_test.py @@ -117,28 +117,6 @@ async def test_list_tasks_negative_pagination_safely_clamped( assert error["type"] == NoResultsError.uri -@pytest.mark.parametrize( - ("pagination_override", "expected_field"), - [ - ({"limit": "abc", "offset": 0}, "limit"), # Invalid type - ({"limit": 5, "offset": "xyz"}, "offset"), # Invalid type - ], - ids=["bad_limit_type", "bad_offset_type"], -) -async def test_list_tasks_invalid_pagination_type( - pagination_override: dict[str, Any], expected_field: str, py_api: httpx.AsyncClient -) -> None: - """Invalid pagination types return 422 Unprocessable Entity.""" - response = await py_api.post( - "/tasks/list", - json={"pagination": pagination_override}, - ) - assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY - # Verify that the error points to the correct field - error = response.json()["errors"][0] - assert error["loc"][-2:] == ["pagination", expected_field] - assert error["type"] in {"type_error.integer", "int_parsing", "int_type"} - @pytest.mark.parametrize( "value", From ba22cc9e98a324ea631b4a18c9a90e5c59841df3 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 13 Apr 2026 15:52:26 +0200 Subject: [PATCH 02/11] Provide tighter input validation for Pagination --- src/routers/dependencies.py | 6 +++--- tests/dependencies/pagination_test.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py index a770a45..99a5a6c 100644 --- a/src/routers/dependencies.py +++ b/src/routers/dependencies.py @@ -3,7 +3,7 @@ from fastapi import Depends from loguru import logger -from pydantic import BaseModel +from pydantic import BaseModel, Field from sqlalchemy.ext.asyncio import AsyncConnection from core.errors import AuthenticationFailedError, AuthenticationRequiredError @@ -58,5 +58,5 @@ def fetch_user_or_raise( class Pagination(BaseModel): - offset: int = 0 - limit: int = 100 + offset: int = Field(default=0, ge=0) + limit: int = Field(default=100, gt=0, le=1000) diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py index 3e4fa1d..335efc2 100644 --- a/tests/dependencies/pagination_test.py +++ b/tests/dependencies/pagination_test.py @@ -17,7 +17,10 @@ def test_pagination_defaults() -> None: ("kwargs", "expected_field"), [ ({"limit": "abc", "offset": 0}, "limit"), + ({"limit": -5, "offset": 0}, "limit"), + ({"limit": 2000, "offset": 0}, "limit"), ({"limit": 5, "offset": "xyz"}, "offset"), + ({"limit": 5, "offset": -5}, "offset"), ], ids=["bad_limit_type", "bad_offset_type"], ) From b7e64f6568788d35af78833404e645e43db2883b Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 13 Apr 2026 15:54:44 +0200 Subject: [PATCH 03/11] Add a test with non-default parameters --- tests/dependencies/pagination_test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py index 335efc2..7b3d679 100644 --- a/tests/dependencies/pagination_test.py +++ b/tests/dependencies/pagination_test.py @@ -13,6 +13,13 @@ def test_pagination_defaults() -> None: assert pagination.limit == 100 +def test_pagination_valid_values() -> None: + """Non-default values within bounds are accepted.""" + pagination = Pagination(limit=500, offset=10) + assert pagination.limit == 500 + assert pagination.offset == 10 + + @pytest.mark.parametrize( ("kwargs", "expected_field"), [ @@ -22,7 +29,7 @@ def test_pagination_defaults() -> None: ({"limit": 5, "offset": "xyz"}, "offset"), ({"limit": 5, "offset": -5}, "offset"), ], - ids=["bad_limit_type", "bad_offset_type"], + ids=["bad_limit_type", "negative_limit", "limit_too_large", "bad_offset_type", "negative_offset"], ) def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None: """Non-integer values for limit or offset raise a ValidationError.""" From e57492e063a2377bfe68f7b4ec392601e1128922 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 14 Apr 2026 12:00:53 +0200 Subject: [PATCH 04/11] Remove 'valid values' test - should be covered by API tests --- tests/dependencies/pagination_test.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py index 7b3d679..db048da 100644 --- a/tests/dependencies/pagination_test.py +++ b/tests/dependencies/pagination_test.py @@ -13,13 +13,6 @@ def test_pagination_defaults() -> None: assert pagination.limit == 100 -def test_pagination_valid_values() -> None: - """Non-default values within bounds are accepted.""" - pagination = Pagination(limit=500, offset=10) - assert pagination.limit == 500 - assert pagination.offset == 10 - - @pytest.mark.parametrize( ("kwargs", "expected_field"), [ @@ -29,7 +22,13 @@ def test_pagination_valid_values() -> None: ({"limit": 5, "offset": "xyz"}, "offset"), ({"limit": 5, "offset": -5}, "offset"), ], - ids=["bad_limit_type", "negative_limit", "limit_too_large", "bad_offset_type", "negative_offset"], + ids=[ + "bad_limit_type", + "negative_limit", + "limit_too_large", + "bad_offset_type", + "negative_offset", + ], ) def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None: """Non-integer values for limit or offset raise a ValidationError.""" From c8868d80af32c50cd9c350ebaecaa625cbd09854 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:10:23 +0000 Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/routers/openml/task_list_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py index ee9eb09..70482a5 100644 --- a/tests/routers/openml/task_list_test.py +++ b/tests/routers/openml/task_list_test.py @@ -117,7 +117,6 @@ async def test_list_tasks_negative_pagination_safely_clamped( assert error["type"] == NoResultsError.uri - @pytest.mark.parametrize( "value", ["1...2", "abc"], From e7ed37c5a2af8e9598a1a13245ca0f86a718744e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 14 Apr 2026 12:33:49 +0200 Subject: [PATCH 06/11] remove test since input validation is now tested centrally --- tests/routers/openml/task_list_test.py | 43 -------------------------- 1 file changed, 43 deletions(-) diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py index 70482a5..78eb5ec 100644 --- a/tests/routers/openml/task_list_test.py +++ b/tests/routers/openml/task_list_test.py @@ -74,49 +74,6 @@ async def test_list_tasks_api_happy_path(py_api: httpx.AsyncClient) -> None: assert "OpenML100" in task["tag"] -@pytest.mark.parametrize( - ("limit", "offset", "expected_status", "expected_max_results"), - [ - (-10, 0, HTTPStatus.NOT_FOUND, 0), # negative limit clamped to 0 -> No results - (5, -10, HTTPStatus.OK, 5), # negative offset clamped to 0 -> First 5 results - ], - ids=["negative_limit", "negative_offset"], -) -async def test_list_tasks_negative_pagination_safely_clamped( - limit: int, - offset: int, - expected_status: int, - expected_max_results: int, - py_api: httpx.AsyncClient, -) -> None: - """Negative pagination values are safely clamped to 0 instead of causing 500 errors. - - A limit clamped to 0 raises NoResultsError, which the API maps to HTTP 404. - An offset clamped to 0 simply returns the first page of results (200 OK). - - Note: This remains an HTTP-level (py_api) test to ensure end-to-end safety is - preserved. - """ - response = await py_api.post( - "/tasks/list", - json={"pagination": {"limit": limit, "offset": offset}}, - ) - assert response.status_code == expected_status - if expected_status == HTTPStatus.OK: - body = response.json() - assert len(body) <= expected_max_results - # Compare to a baseline with offset=0 to prove it was correctly clamped - baseline = await py_api.post( - "/tasks/list", - json={"pagination": {"limit": limit, "offset": 0}}, - ) - assert baseline.status_code == HTTPStatus.OK - assert [t["task_id"] for t in body] == [t["task_id"] for t in baseline.json()] - else: - error = response.json() - assert error["type"] == NoResultsError.uri - - @pytest.mark.parametrize( "value", ["1...2", "abc"], From 9d5bf7b3215e57cebba4cd326485b6e1f97e63e0 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 14 Apr 2026 12:35:06 +0200 Subject: [PATCH 07/11] Add noqa directive for pagination limit --- tests/dependencies/pagination_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py index db048da..0de2d69 100644 --- a/tests/dependencies/pagination_test.py +++ b/tests/dependencies/pagination_test.py @@ -10,7 +10,7 @@ def test_pagination_defaults() -> None: """Pagination has expected defaults when no values are provided.""" pagination = Pagination() assert pagination.offset == 0 - assert pagination.limit == 100 + assert pagination.limit == 100 # noqa: PLR2004 @pytest.mark.parametrize( From 536d319fb923ba11591629d4dd80b78535786661 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 14 Apr 2026 13:59:28 +0200 Subject: [PATCH 08/11] Update dataset list test to reflect pagination constraints --- src/routers/dependencies.py | 6 +++++- .../openml/datasets_list_datasets_test.py | 20 +++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py index 99a5a6c..dba12d8 100644 --- a/src/routers/dependencies.py +++ b/src/routers/dependencies.py @@ -57,6 +57,10 @@ def fetch_user_or_raise( return user +LIMIT_DEFAULT = 100 +LIMIT_MAX = 1000 + + class Pagination(BaseModel): offset: int = Field(default=0, ge=0) - limit: int = Field(default=100, gt=0, le=1000) + limit: int = Field(default=LIMIT_DEFAULT, gt=0, le=LIMIT_MAX) diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index be08927..5bbecc7 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -11,7 +11,7 @@ from core.errors import NoResultsError from database.users import User -from routers.dependencies import Pagination +from routers.dependencies import LIMIT_DEFAULT, LIMIT_MAX, Pagination from routers.openml.datasets import DatasetStatusFilter, list_datasets from tests import constants from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey @@ -57,12 +57,6 @@ async def test_list_data_identical( api_key = kwargs.pop("api_key") api_key_query = f"?api_key={api_key}" if api_key else "" - # Pagination parameters are nested in the new query style - # The old style has no `limit` by default, so we mimic this with a high default - new_style = kwargs | {"pagination": {"limit": limit or 1_000_000}} - if offset is not None: - new_style["pagination"]["offset"] = offset - # old style `/data/filter` encodes all filters as a path query = [ [filter_, value if not isinstance(value, list) else ",".join(str(v) for v in value)] @@ -74,13 +68,21 @@ async def test_list_data_identical( uri += f"/{'/'.join([str(v) for q in query for v in q])}" uri += api_key_query + # new style just takes the values directly in a JSON body, + # except that the limit and offset parameters are under a pagination field. + if limit is not None: + kwargs.setdefault("pagination", {})["limit"] = limit + if offset is not None: + kwargs.setdefault("pagination", {})["offset"] = offset + py_response, php_response = await asyncio.gather( - py_api.post(f"/datasets/list{api_key_query}", json=new_style), + py_api.post(f"/datasets/list{api_key_query}", json=kwargs), php_api.get(uri), ) # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results) # and the error response format, so we can't compare error responses directly. + # Validation errors shouldn't occur since the search space doesn't include invalid values php_is_error = php_response.status_code == HTTPStatus.PRECONDITION_FAILED py_is_error = py_response.status_code == HTTPStatus.NOT_FOUND @@ -105,6 +107,8 @@ async def test_list_data_identical( # PHP API has a double nested dictionary that never has other entries php_json = php_response.json()["data"]["dataset"] + # The default limit changed from unbound to 100. + php_json = php_json[:LIMIT_DEFAULT] assert len(py_json) == len(php_json) assert py_json == php_json return None From 23664ba50b0b8499bd39283c0496ccfe899c7f6b Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 14 Apr 2026 14:01:38 +0200 Subject: [PATCH 09/11] Update test to reflect new pagination constraints --- tests/routers/openml/migration/tasks_migration_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py index a11f1a5..ea3226b 100644 --- a/tests/routers/openml/migration/tasks_migration_test.py +++ b/tests/routers/openml/migration/tasks_migration_test.py @@ -11,6 +11,7 @@ nested_remove_single_element_list, nested_remove_values, ) +from routers.dependencies import LIMIT_MAX @pytest.mark.parametrize( @@ -141,8 +142,7 @@ async def test_list_tasks_equal( - PHP error status is 412 PRECONDITION_FAILED; Python uses 404 NOT_FOUND. """ php_path = _build_php_task_list_path(php_params) - # Use a very large limit on Python side to match PHP's unbounded default result count - py_body = {**py_extra, "pagination": {"limit": 1_000_000, "offset": 0}} + py_body = {**py_extra, "pagination": {"limit": LIMIT_MAX, "offset": 0}} py_response, php_response = await asyncio.gather( py_api.post("/tasks/list", json=py_body), php_api.get(php_path), @@ -163,6 +163,7 @@ async def test_list_tasks_equal( php_tasks: list[dict[str, Any]] = ( php_tasks_raw if isinstance(php_tasks_raw, list) else [php_tasks_raw] ) + php_tasks = php_tasks[:LIMIT_MAX] py_tasks: list[dict[str, Any]] = [_normalize_py_task(t) for t in py_response.json()] php_ids = {int(t["task_id"]) for t in php_tasks} From 8b6832c108f45fb6814861a667f8cd100cc3a201 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 12:02:41 +0000 Subject: [PATCH 10/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/routers/openml/datasets_list_datasets_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index 5bbecc7..bdf3c6e 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -11,7 +11,7 @@ from core.errors import NoResultsError from database.users import User -from routers.dependencies import LIMIT_DEFAULT, LIMIT_MAX, Pagination +from routers.dependencies import LIMIT_DEFAULT, Pagination from routers.openml.datasets import DatasetStatusFilter, list_datasets from tests import constants from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey From d20d4cc913ba69a32516c7f070fb49a1aa306147 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 14 Apr 2026 14:08:04 +0200 Subject: [PATCH 11/11] Only truncate list when limit isn't set --- tests/routers/openml/datasets_list_datasets_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index bdf3c6e..7460fb5 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -108,7 +108,8 @@ async def test_list_data_identical( # PHP API has a double nested dictionary that never has other entries php_json = php_response.json()["data"]["dataset"] # The default limit changed from unbound to 100. - php_json = php_json[:LIMIT_DEFAULT] + if limit is None: + php_json = php_json[:LIMIT_DEFAULT] assert len(py_json) == len(php_json) assert py_json == php_json return None