From 6380f7c0f069c4bfe07a57b1c1b4784bcd30864d Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Mon, 13 Apr 2026 15:47:36 +0200
Subject: [PATCH 01/11] Centralize pagination validation test

---
 tests/dependencies/pagination_test.py  | 29 ++++++++++++++++++++++++++
 tests/routers/openml/task_list_test.py | 22 -------------------
 2 files changed, 29 insertions(+), 22 deletions(-)
 create mode 100644 tests/dependencies/pagination_test.py

diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
new file mode 100644
index 0000000..3e4fa1d
--- /dev/null
+++ b/tests/dependencies/pagination_test.py
@@ -0,0 +1,29 @@
+from typing import Any
+
+import pytest
+from pydantic import ValidationError
+
+from routers.dependencies import Pagination
+
+
+def test_pagination_defaults() -> None:
+    """Pagination has expected defaults when no values are provided."""
+    pagination = Pagination()
+    assert pagination.offset == 0
+    assert pagination.limit == 100
+
+
+@pytest.mark.parametrize(
+    ("kwargs", "expected_field"),
+    [
+        ({"limit": "abc", "offset": 0}, "limit"),
+        ({"limit": 5, "offset": "xyz"}, "offset"),
+    ],
+    ids=["bad_limit_type", "bad_offset_type"],
+)
+def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None:
+    """Non-integer values for limit or offset raise a ValidationError."""
+    with pytest.raises(ValidationError) as exc_info:
+        Pagination(**kwargs)
+    errors = exc_info.value.errors()
+    assert any(error["loc"] == (expected_field,) for error in errors)
diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index 67d8539..ee9eb09 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -117,28 +117,6 @@ async def test_list_tasks_negative_pagination_safely_clamped(
         assert error["type"] == NoResultsError.uri
 
 
-@pytest.mark.parametrize(
-    ("pagination_override", "expected_field"),
-    [
-        ({"limit": "abc", "offset": 0}, "limit"),  # Invalid type
-        ({"limit": 5, "offset": "xyz"}, "offset"),  # Invalid type
-    ],
-    ids=["bad_limit_type", "bad_offset_type"],
-)
-async def test_list_tasks_invalid_pagination_type(
-    pagination_override: dict[str, Any], expected_field: str, py_api: httpx.AsyncClient
-) -> None:
-    """Invalid pagination types return 422 Unprocessable Entity."""
-    response = await py_api.post(
-        "/tasks/list",
-        json={"pagination": pagination_override},
-    )
-    assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
-    # Verify that the error points to the correct field
-    error = response.json()["errors"][0]
-    assert error["loc"][-2:] == ["pagination", expected_field]
-    assert error["type"] in {"type_error.integer", "int_parsing", "int_type"}
-
 
 @pytest.mark.parametrize(
     "value",

From ba22cc9e98a324ea631b4a18c9a90e5c59841df3 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Mon, 13 Apr 2026 15:52:26 +0200
Subject: [PATCH 02/11] Provide tighter input validation for Pagination

---
 src/routers/dependencies.py           | 6 +++---
 tests/dependencies/pagination_test.py | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py
index a770a45..99a5a6c 100644
--- a/src/routers/dependencies.py
+++ b/src/routers/dependencies.py
@@ -3,7 +3,7 @@
 
 from fastapi import Depends
 from loguru import logger
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from sqlalchemy.ext.asyncio import AsyncConnection
 
 from core.errors import AuthenticationFailedError, AuthenticationRequiredError
@@ -58,5 +58,5 @@ def fetch_user_or_raise(
 
 
 class Pagination(BaseModel):
-    offset: int = 0
-    limit: int = 100
+    offset: int = Field(default=0, ge=0)
+    limit: int = Field(default=100, gt=0, le=1000)
diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index 3e4fa1d..335efc2 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -17,7 +17,10 @@ def test_pagination_defaults() -> None:
     ("kwargs", "expected_field"),
     [
         ({"limit": "abc", "offset": 0}, "limit"),
+        ({"limit": -5, "offset": 0}, "limit"),
+        ({"limit": 2000, "offset": 0}, "limit"),
         ({"limit": 5, "offset": "xyz"}, "offset"),
+        ({"limit": 5, "offset": -5}, "offset"),
     ],
     ids=["bad_limit_type", "bad_offset_type"],
 )

From b7e64f6568788d35af78833404e645e43db2883b Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Mon, 13 Apr 2026 15:54:44 +0200
Subject: [PATCH 03/11] Add a test with non-default parameters

---
 tests/dependencies/pagination_test.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index 335efc2..7b3d679 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -13,6 +13,13 @@ def test_pagination_defaults() -> None:
     assert pagination.limit == 100
 
 
+def test_pagination_valid_values() -> None:
+    """Non-default values within bounds are accepted."""
+    pagination = Pagination(limit=500, offset=10)
+    assert pagination.limit == 500
+    assert pagination.offset == 10
+
+
 @pytest.mark.parametrize(
     ("kwargs", "expected_field"),
     [
@@ -22,7 +29,7 @@ def test_pagination_defaults() -> None:
         ({"limit": 5, "offset": "xyz"}, "offset"),
         ({"limit": 5, "offset": -5}, "offset"),
     ],
-    ids=["bad_limit_type", "bad_offset_type"],
+    ids=["bad_limit_type", "negative_limit", "limit_too_large", "bad_offset_type", "negative_offset"],
 )
 def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None:
     """Non-integer values for limit or offset raise a ValidationError."""

From e57492e063a2377bfe68f7b4ec392601e1128922 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 14 Apr 2026 12:00:53 +0200
Subject: [PATCH 04/11] Remove 'valid values' test - should be covered by API
 tests

---
 tests/dependencies/pagination_test.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index 7b3d679..db048da 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -13,13 +13,6 @@ def test_pagination_defaults() -> None:
     assert pagination.limit == 100
 
 
-def test_pagination_valid_values() -> None:
-    """Non-default values within bounds are accepted."""
-    pagination = Pagination(limit=500, offset=10)
-    assert pagination.limit == 500
-    assert pagination.offset == 10
-
-
 @pytest.mark.parametrize(
     ("kwargs", "expected_field"),
     [
@@ -29,7 +22,13 @@ def test_pagination_valid_values() -> None:
         ({"limit": 5, "offset": "xyz"}, "offset"),
         ({"limit": 5, "offset": -5}, "offset"),
     ],
-    ids=["bad_limit_type", "negative_limit", "limit_too_large", "bad_offset_type", "negative_offset"],
+    ids=[
+        "bad_limit_type",
+        "negative_limit",
+        "limit_too_large",
+        "bad_offset_type",
+        "negative_offset",
+    ],
 )
 def test_pagination_invalid_type(kwargs: dict[str, Any], expected_field: str) -> None:
     """Non-integer values for limit or offset raise a ValidationError."""

From c8868d80af32c50cd9c350ebaecaa625cbd09854 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 10:10:23 +0000
Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/routers/openml/task_list_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index ee9eb09..70482a5 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -117,7 +117,6 @@ async def test_list_tasks_negative_pagination_safely_clamped(
         assert error["type"] == NoResultsError.uri
 
 
-
 @pytest.mark.parametrize(
     "value",
     ["1...2", "abc"],

From e7ed37c5a2af8e9598a1a13245ca0f86a718744e Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 14 Apr 2026 12:33:49 +0200
Subject: [PATCH 06/11] remove test since input validation is now tested
 centrally

---
 tests/routers/openml/task_list_test.py | 43 --------------------------
 1 file changed, 43 deletions(-)

diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index 70482a5..78eb5ec 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -74,49 +74,6 @@ async def test_list_tasks_api_happy_path(py_api: httpx.AsyncClient) -> None:
     assert "OpenML100" in task["tag"]
 
 
-@pytest.mark.parametrize(
-    ("limit", "offset", "expected_status", "expected_max_results"),
-    [
-        (-10, 0, HTTPStatus.NOT_FOUND, 0),  # negative limit clamped to 0 -> No results
-        (5, -10, HTTPStatus.OK, 5),  # negative offset clamped to 0 -> First 5 results
-    ],
-    ids=["negative_limit", "negative_offset"],
-)
-async def test_list_tasks_negative_pagination_safely_clamped(
-    limit: int,
-    offset: int,
-    expected_status: int,
-    expected_max_results: int,
-    py_api: httpx.AsyncClient,
-) -> None:
-    """Negative pagination values are safely clamped to 0 instead of causing 500 errors.
-
-    A limit clamped to 0 raises NoResultsError, which the API maps to HTTP 404.
-    An offset clamped to 0 simply returns the first page of results (200 OK).
-
-    Note: This remains an HTTP-level (py_api) test to ensure end-to-end safety is
-    preserved.
-    """
-    response = await py_api.post(
-        "/tasks/list",
-        json={"pagination": {"limit": limit, "offset": offset}},
-    )
-    assert response.status_code == expected_status
-    if expected_status == HTTPStatus.OK:
-        body = response.json()
-        assert len(body) <= expected_max_results
-        # Compare to a baseline with offset=0 to prove it was correctly clamped
-        baseline = await py_api.post(
-            "/tasks/list",
-            json={"pagination": {"limit": limit, "offset": 0}},
-        )
-        assert baseline.status_code == HTTPStatus.OK
-        assert [t["task_id"] for t in body] == [t["task_id"] for t in baseline.json()]
-    else:
-        error = response.json()
-        assert error["type"] == NoResultsError.uri
-
-
 @pytest.mark.parametrize(
     "value",
     ["1...2", "abc"],

From 9d5bf7b3215e57cebba4cd326485b6e1f97e63e0 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 14 Apr 2026 12:35:06 +0200
Subject: [PATCH 07/11] Add noqa directive for pagination limit

---
 tests/dependencies/pagination_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/dependencies/pagination_test.py b/tests/dependencies/pagination_test.py
index db048da..0de2d69 100644
--- a/tests/dependencies/pagination_test.py
+++ b/tests/dependencies/pagination_test.py
@@ -10,7 +10,7 @@ def test_pagination_defaults() -> None:
     """Pagination has expected defaults when no values are provided."""
     pagination = Pagination()
     assert pagination.offset == 0
-    assert pagination.limit == 100
+    assert pagination.limit == 100  # noqa: PLR2004
 
 
 @pytest.mark.parametrize(

From 536d319fb923ba11591629d4dd80b78535786661 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 14 Apr 2026 13:59:28 +0200
Subject: [PATCH 08/11] Update dataset list test to reflect pagination
 constraints

---
 src/routers/dependencies.py                   |  6 +++++-
 .../openml/datasets_list_datasets_test.py     | 20 +++++++++++--------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py
index 99a5a6c..dba12d8 100644
--- a/src/routers/dependencies.py
+++ b/src/routers/dependencies.py
@@ -57,6 +57,10 @@ def fetch_user_or_raise(
     return user
 
 
+LIMIT_DEFAULT = 100
+LIMIT_MAX = 1000
+
+
 class Pagination(BaseModel):
     offset: int = Field(default=0, ge=0)
-    limit: int = Field(default=100, gt=0, le=1000)
+    limit: int = Field(default=LIMIT_DEFAULT, gt=0, le=LIMIT_MAX)
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index be08927..5bbecc7 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -11,7 +11,7 @@
 
 from core.errors import NoResultsError
 from database.users import User
-from routers.dependencies import Pagination
+from routers.dependencies import LIMIT_DEFAULT, LIMIT_MAX, Pagination
 from routers.openml.datasets import DatasetStatusFilter, list_datasets
 from tests import constants
 from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey
@@ -57,12 +57,6 @@ async def test_list_data_identical(
     api_key = kwargs.pop("api_key")
     api_key_query = f"?api_key={api_key}" if api_key else ""
 
-    # Pagination parameters are nested in the new query style
-    # The old style has no `limit` by default, so we mimic this with a high default
-    new_style = kwargs | {"pagination": {"limit": limit or 1_000_000}}
-    if offset is not None:
-        new_style["pagination"]["offset"] = offset
-
     # old style `/data/filter` encodes all filters as a path
     query = [
         [filter_, value if not isinstance(value, list) else ",".join(str(v) for v in value)]
@@ -74,13 +68,21 @@ async def test_list_data_identical(
         uri += f"/{'/'.join([str(v) for q in query for v in q])}"
     uri += api_key_query
 
+    # new style just takes the values directly in a JSON body,
+    # except that the limit and offset parameters are under a pagination field.
+    if limit is not None:
+        kwargs.setdefault("pagination", {})["limit"] = limit
+    if offset is not None:
+        kwargs.setdefault("pagination", {})["offset"] = offset
+
     py_response, php_response = await asyncio.gather(
-        py_api.post(f"/datasets/list{api_key_query}", json=new_style),
+        py_api.post(f"/datasets/list{api_key_query}", json=kwargs),
         php_api.get(uri),
     )
 
     # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
     # and the error response format, so we can't compare error responses directly.
+    # Validation errors shouldn't occur since the search space doesn't include invalid values
     php_is_error = php_response.status_code == HTTPStatus.PRECONDITION_FAILED
     py_is_error = py_response.status_code == HTTPStatus.NOT_FOUND
 
@@ -105,6 +107,8 @@ async def test_list_data_identical(
 
     # PHP API has a double nested dictionary that never has other entries
     php_json = php_response.json()["data"]["dataset"]
+    # The default limit changed from unbound to 100.
+    php_json = php_json[:LIMIT_DEFAULT]
     assert len(py_json) == len(php_json)
     assert py_json == php_json
     return None

From 23664ba50b0b8499bd39283c0496ccfe899c7f6b Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 14 Apr 2026 14:01:38 +0200
Subject: [PATCH 09/11] Update test to reflect new pagination constraints

---
 tests/routers/openml/migration/tasks_migration_test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py
index a11f1a5..ea3226b 100644
--- a/tests/routers/openml/migration/tasks_migration_test.py
+++ b/tests/routers/openml/migration/tasks_migration_test.py
@@ -11,6 +11,7 @@
     nested_remove_single_element_list,
     nested_remove_values,
 )
+from routers.dependencies import LIMIT_MAX
 
 
 @pytest.mark.parametrize(
@@ -141,8 +142,7 @@ async def test_list_tasks_equal(
     - PHP error status is 412 PRECONDITION_FAILED; Python uses 404 NOT_FOUND.
     """
     php_path = _build_php_task_list_path(php_params)
-    # Use a very large limit on Python side to match PHP's unbounded default result count
-    py_body = {**py_extra, "pagination": {"limit": 1_000_000, "offset": 0}}
+    py_body = {**py_extra, "pagination": {"limit": LIMIT_MAX, "offset": 0}}
     py_response, php_response = await asyncio.gather(
         py_api.post("/tasks/list", json=py_body),
         php_api.get(php_path),
@@ -163,6 +163,7 @@ async def test_list_tasks_equal(
     php_tasks: list[dict[str, Any]] = (
         php_tasks_raw if isinstance(php_tasks_raw, list) else [php_tasks_raw]
     )
+    php_tasks = php_tasks[:LIMIT_MAX]
     py_tasks: list[dict[str, Any]] = [_normalize_py_task(t) for t in py_response.json()]
 
     php_ids = {int(t["task_id"]) for t in php_tasks}

From 8b6832c108f45fb6814861a667f8cd100cc3a201 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 12:02:41 +0000
Subject: [PATCH 10/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/routers/openml/datasets_list_datasets_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index 5bbecc7..bdf3c6e 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -11,7 +11,7 @@
 
 from core.errors import NoResultsError
 from database.users import User
-from routers.dependencies import LIMIT_DEFAULT, LIMIT_MAX, Pagination
+from routers.dependencies import LIMIT_DEFAULT, Pagination
 from routers.openml.datasets import DatasetStatusFilter, list_datasets
 from tests import constants
 from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey

From d20d4cc913ba69a32516c7f070fb49a1aa306147 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 14 Apr 2026 14:08:04 +0200
Subject: [PATCH 11/11] Only truncate list when limit isn't set

---
 tests/routers/openml/datasets_list_datasets_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index bdf3c6e..7460fb5 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -108,7 +108,8 @@ async def test_list_data_identical(
     # PHP API has a double nested dictionary that never has other entries
     php_json = php_response.json()["data"]["dataset"]
     # The default limit changed from unbound to 100.
-    php_json = php_json[:LIMIT_DEFAULT]
+    if limit is None:
+        php_json = php_json[:LIMIT_DEFAULT]
     assert len(py_json) == len(php_json)
     assert py_json == php_json
     return None