From 6380f7c0f069c4bfe07a57b1c1b4784bcd30864d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 13 Apr 2026 15:47:36 +0200
Subject: [PATCH 01/11] Centralize pagination validation test
---
tests/dependencies/pagination_test.py | 29 ++++++++++++++++++++++++++
tests/routers/openml/task_list_test.py | 22 -------------------
2 files changed, 29 insertions(+), 22 deletions(-)
create mode 100644 tests/dependencies/pagination_test.py
diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
new file mode 100644
index 0000000..3e4fa1d
--- /dev/null
+++ b/tests/dependencies/pagination_test.py
@@ -0,0 +1,29 @@
+from typing import Any
+
+import pytest
+from pydantic import ValidationError
+
+from routers.dependencies import Pagination
+
+
+def test_pagination_defaults() -> None:
+ """Pagination has expected defaults when no values are provided."""
+ pagination = Pagination()
+ assert pagination.offset == 0
+ assert pagination.limit == 100
+
+
+@pytest.mark.parametrize(
+ ("kwargs", "expected_field"),
+ [
+ ({"limit": "abc", "offset": 0}, "limit"),
+ ({"limit": 5, "offset": "xyz"}, "offset"),
+ ],
+ ids=["bad_limit_type", "bad_offset_type"],
+)
+def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None:
+ """Non-integer values for limit or offset raise a ValidationError."""
+ with pytest.raises(ValidationError) as exc_info:
+ Pagination(**kwargs)
+ errors = exc_info.value.errors()
+ assert any(error["loc"] == (expected_field,) for error in errors)
diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index 67d8539..ee9eb09 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -117,28 +117,6 @@ async def test_list_tasks_negative_pagination_safely_clamped(
assert error["type"] == NoResultsError.uri
-@pytest.mark.parametrize(
- ("pagination_override", "expected_field"),
- [
- ({"limit": "abc", "offset": 0}, "limit"), # Invalid type
- ({"limit": 5, "offset": "xyz"}, "offset"), # Invalid type
- ],
- ids=["bad_limit_type", "bad_offset_type"],
-)
-async def test_list_tasks_invalid_pagination_type(
- pagination_override: dict[str, Any], expected_field: str, py_api: httpx.AsyncClient
-) -> None:
- """Invalid pagination types return 422 Unprocessable Entity."""
- response = await py_api.post(
- "/tasks/list",
- json={"pagination": pagination_override},
- )
- assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
- # Verify that the error points to the correct field
- error = response.json()["errors"][0]
- assert error["loc"][-2:] == ["pagination", expected_field]
- assert error["type"] in {"type_error.integer", "int_parsing", "int_type"}
-
@pytest.mark.parametrize(
"value",
From ba22cc9e98a324ea631b4a18c9a90e5c59841df3 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 13 Apr 2026 15:52:26 +0200
Subject: [PATCH 02/11] Provide tighter input validation for Pagination
---
src/routers/dependencies.py | 6 +++---
tests/dependencies/pagination_test.py | 3 +++
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py
index a770a45..99a5a6c 100644
--- a/src/routers/dependencies.py
+++ b/src/routers/dependencies.py
@@ -3,7 +3,7 @@
from fastapi import Depends
from loguru import logger
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncConnection
from core.errors import AuthenticationFailedError, AuthenticationRequiredError
@@ -58,5 +58,5 @@ def fetch_user_or_raise(
class Pagination(BaseModel):
- offset: int = 0
- limit: int = 100
+ offset: int = Field(default=0, ge=0)
+ limit: int = Field(default=100, gt=0, le=1000)
diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index 3e4fa1d..335efc2 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -17,7 +17,10 @@ def test_pagination_defaults() -> None:
("kwargs", "expected_field"),
[
({"limit": "abc", "offset": 0}, "limit"),
+ ({"limit": -5, "offset": 0}, "limit"),
+ ({"limit": 2000, "offset": 0}, "limit"),
({"limit": 5, "offset": "xyz"}, "offset"),
+ ({"limit": 5, "offset": -5}, "offset"),
],
ids=["bad_limit_type", "bad_offset_type"],
)
From b7e64f6568788d35af78833404e645e43db2883b Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 13 Apr 2026 15:54:44 +0200
Subject: [PATCH 03/11] Add a test with non-default parameters
---
tests/dependencies/pagination_test.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index 335efc2..7b3d679 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -13,6 +13,13 @@ def test_pagination_defaults() -> None:
assert pagination.limit == 100
+def test_pagination_valid_values() -> None:
+ """Non-default values within bounds are accepted."""
+ pagination = Pagination(limit=500, offset=10)
+ assert pagination.limit == 500
+ assert pagination.offset == 10
+
+
@pytest.mark.parametrize(
("kwargs", "expected_field"),
[
@@ -22,7 +29,7 @@ def test_pagination_defaults() -> None:
({"limit": 5, "offset": "xyz"}, "offset"),
({"limit": 5, "offset": -5}, "offset"),
],
- ids=["bad_limit_type", "bad_offset_type"],
+ ids=["bad_limit_type", "negative_limit", "limit_too_large", "bad_offset_type", "negative_offset"],
)
def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None:
"""Non-integer values for limit or offset raise a ValidationError."""
From e57492e063a2377bfe68f7b4ec392601e1128922 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 14 Apr 2026 12:00:53 +0200
Subject: [PATCH 04/11] Remove 'valid values' test - should be covered by API
tests
---
tests/dependencies/pagination_test.py | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index 7b3d679..db048da 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -13,13 +13,6 @@ def test_pagination_defaults() -> None:
assert pagination.limit == 100
-def test_pagination_valid_values() -> None:
- """Non-default values within bounds are accepted."""
- pagination = Pagination(limit=500, offset=10)
- assert pagination.limit == 500
- assert pagination.offset == 10
-
-
@pytest.mark.parametrize(
("kwargs", "expected_field"),
[
@@ -29,7 +22,13 @@ def test_pagination_valid_values() -> None:
({"limit": 5, "offset": "xyz"}, "offset"),
({"limit": 5, "offset": -5}, "offset"),
],
- ids=["bad_limit_type", "negative_limit", "limit_too_large", "bad_offset_type", "negative_offset"],
+ ids=[
+ "bad_limit_type",
+ "negative_limit",
+ "limit_too_large",
+ "bad_offset_type",
+ "negative_offset",
+ ],
)
def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None:
"""Non-integer values for limit or offset raise a ValidationError."""
From c8868d80af32c50cd9c350ebaecaa625cbd09854 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
<66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 10:10:23 +0000
Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---
tests/routers/openml/task_list_test.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index ee9eb09..70482a5 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -117,7 +117,6 @@ async def test_list_tasks_negative_pagination_safely_clamped(
assert error["type"] == NoResultsError.uri
-
@pytest.mark.parametrize(
"value",
["1...2", "abc"],
From e7ed37c5a2af8e9598a1a13245ca0f86a718744e Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 14 Apr 2026 12:33:49 +0200
Subject: [PATCH 06/11] remove test since input validation is now tested
centrally
---
tests/routers/openml/task_list_test.py | 43 --------------------------
1 file changed, 43 deletions(-)
diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index 70482a5..78eb5ec 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -74,49 +74,6 @@ async def test_list_tasks_api_happy_path(py_api: httpx.AsyncClient) -> None:
assert "OpenML100" in task["tag"]
-@pytest.mark.parametrize(
- ("limit", "offset", "expected_status", "expected_max_results"),
- [
- (-10, 0, HTTPStatus.NOT_FOUND, 0), # negative limit clamped to 0 -> No results
- (5, -10, HTTPStatus.OK, 5), # negative offset clamped to 0 -> First 5 results
- ],
- ids=["negative_limit", "negative_offset"],
-)
-async def test_list_tasks_negative_pagination_safely_clamped(
- limit: int,
- offset: int,
- expected_status: int,
- expected_max_results: int,
- py_api: httpx.AsyncClient,
-) -> None:
- """Negative pagination values are safely clamped to 0 instead of causing 500 errors.
-
- A limit clamped to 0 raises NoResultsError, which the API maps to HTTP 404.
- An offset clamped to 0 simply returns the first page of results (200 OK).
-
- Note: This remains an HTTP-level (py_api) test to ensure end-to-end safety is
- preserved.
- """
- response = await py_api.post(
- "/tasks/list",
- json={"pagination": {"limit": limit, "offset": offset}},
- )
- assert response.status_code == expected_status
- if expected_status == HTTPStatus.OK:
- body = response.json()
- assert len(body) <= expected_max_results
- # Compare to a baseline with offset=0 to prove it was correctly clamped
- baseline = await py_api.post(
- "/tasks/list",
- json={"pagination": {"limit": limit, "offset": 0}},
- )
- assert baseline.status_code == HTTPStatus.OK
- assert [t["task_id"] for t in body] == [t["task_id"] for t in baseline.json()]
- else:
- error = response.json()
- assert error["type"] == NoResultsError.uri
-
-
@pytest.mark.parametrize(
"value",
["1...2", "abc"],
From 9d5bf7b3215e57cebba4cd326485b6e1f97e63e0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 14 Apr 2026 12:35:06 +0200
Subject: [PATCH 07/11] Add noqa directive for pagination limit
---
tests/dependencies/pagination_test.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index db048da..0de2d69 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -10,7 +10,7 @@ def test_pagination_defaults() -> None:
"""Pagination has expected defaults when no values are provided."""
pagination = Pagination()
assert pagination.offset == 0
- assert pagination.limit == 100
+ assert pagination.limit == 100 # noqa: PLR2004
@pytest.mark.parametrize(
From 536d319fb923ba11591629d4dd80b78535786661 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 14 Apr 2026 13:59:28 +0200
Subject: [PATCH 08/11] Update dataset list test to reflect pagination
constraints
---
src/routers/dependencies.py | 6 +++++-
.../openml/datasets_list_datasets_test.py | 20 +++++++++++--------
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py
index 99a5a6c..dba12d8 100644
--- a/src/routers/dependencies.py
+++ b/src/routers/dependencies.py
@@ -57,6 +57,10 @@ def fetch_user_or_raise(
return user
+LIMIT_DEFAULT = 100
+LIMIT_MAX = 1000
+
+
class Pagination(BaseModel):
offset: int = Field(default=0, ge=0)
- limit: int = Field(default=100, gt=0, le=1000)
+ limit: int = Field(default=LIMIT_DEFAULT, gt=0, le=LIMIT_MAX)
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index be08927..5bbecc7 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -11,7 +11,7 @@
from core.errors import NoResultsError
from database.users import User
-from routers.dependencies import Pagination
+from routers.dependencies import LIMIT_DEFAULT, LIMIT_MAX, Pagination
from routers.openml.datasets import DatasetStatusFilter, list_datasets
from tests import constants
from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey
@@ -57,12 +57,6 @@ async def test_list_data_identical(
api_key = kwargs.pop("api_key")
api_key_query = f"?api_key={api_key}" if api_key else ""
- # Pagination parameters are nested in the new query style
- # The old style has no `limit` by default, so we mimic this with a high default
- new_style = kwargs | {"pagination": {"limit": limit or 1_000_000}}
- if offset is not None:
- new_style["pagination"]["offset"] = offset
-
# old style `/data/filter` encodes all filters as a path
query = [
[filter_, value if not isinstance(value, list) else ",".join(str(v) for v in value)]
@@ -74,13 +68,21 @@ async def test_list_data_identical(
uri += f"/{'/'.join([str(v) for q in query for v in q])}"
uri += api_key_query
+ # new style just takes the values directly in a JSON body,
+ # except that the limit and offset parameters are under a pagination field.
+ if limit is not None:
+ kwargs.setdefault("pagination", {})["limit"] = limit
+ if offset is not None:
+ kwargs.setdefault("pagination", {})["offset"] = offset
+
py_response, php_response = await asyncio.gather(
- py_api.post(f"/datasets/list{api_key_query}", json=new_style),
+ py_api.post(f"/datasets/list{api_key_query}", json=kwargs),
php_api.get(uri),
)
# Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
# and the error response format, so we can't compare error responses directly.
+ # Validation errors shouldn't occur since the search space doesn't include invalid values
php_is_error = php_response.status_code == HTTPStatus.PRECONDITION_FAILED
py_is_error = py_response.status_code == HTTPStatus.NOT_FOUND
@@ -105,6 +107,8 @@ async def test_list_data_identical(
# PHP API has a double nested dictionary that never has other entries
php_json = php_response.json()["data"]["dataset"]
+ # The default limit changed from unbound to 100.
+ php_json = php_json[:LIMIT_DEFAULT]
assert len(py_json) == len(php_json)
assert py_json == php_json
return None
From 23664ba50b0b8499bd39283c0496ccfe899c7f6b Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 14 Apr 2026 14:01:38 +0200
Subject: [PATCH 09/11] Update test to reflect new pagination constraints
---
tests/routers/openml/migration/tasks_migration_test.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py
index a11f1a5..ea3226b 100644
--- a/tests/routers/openml/migration/tasks_migration_test.py
+++ b/tests/routers/openml/migration/tasks_migration_test.py
@@ -11,6 +11,7 @@
nested_remove_single_element_list,
nested_remove_values,
)
+from routers.dependencies import LIMIT_MAX
@pytest.mark.parametrize(
@@ -141,8 +142,7 @@ async def test_list_tasks_equal(
- PHP error status is 412 PRECONDITION_FAILED; Python uses 404 NOT_FOUND.
"""
php_path = _build_php_task_list_path(php_params)
- # Use a very large limit on Python side to match PHP's unbounded default result count
- py_body = {**py_extra, "pagination": {"limit": 1_000_000, "offset": 0}}
+ py_body = {**py_extra, "pagination": {"limit": LIMIT_MAX, "offset": 0}}
py_response, php_response = await asyncio.gather(
py_api.post("/tasks/list", json=py_body),
php_api.get(php_path),
@@ -163,6 +163,7 @@ async def test_list_tasks_equal(
php_tasks: list[dict[str, Any]] = (
php_tasks_raw if isinstance(php_tasks_raw, list) else [php_tasks_raw]
)
+ php_tasks = php_tasks[:LIMIT_MAX]
py_tasks: list[dict[str, Any]] = [_normalize_py_task(t) for t in py_response.json()]
php_ids = {int(t["task_id"]) for t in php_tasks}
From 8b6832c108f45fb6814861a667f8cd100cc3a201 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
<66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 12:02:41 +0000
Subject: [PATCH 10/11] [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---
tests/routers/openml/datasets_list_datasets_test.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index 5bbecc7..bdf3c6e 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -11,7 +11,7 @@
from core.errors import NoResultsError
from database.users import User
-from routers.dependencies import LIMIT_DEFAULT, LIMIT_MAX, Pagination
+from routers.dependencies import LIMIT_DEFAULT, Pagination
from routers.openml.datasets import DatasetStatusFilter, list_datasets
from tests import constants
from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey
From d20d4cc913ba69a32516c7f070fb49a1aa306147 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 14 Apr 2026 14:08:04 +0200
Subject: [PATCH 11/11] Only truncate list when limit isn't set
---
tests/routers/openml/datasets_list_datasets_test.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index bdf3c6e..7460fb5 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -108,7 +108,8 @@ async def test_list_data_identical(
# PHP API has a double nested dictionary that never has other entries
php_json = php_response.json()["data"]["dataset"]
# The default limit changed from unbound to 100.
- php_json = php_json[:LIMIT_DEFAULT]
+ if limit is None:
+ php_json = php_json[:LIMIT_DEFAULT]
assert len(py_json) == len(php_json)
assert py_json == php_json
return None