From 7c205aa4936373aab571c2399c5362cf146b7758 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 26 Mar 2026 16:24:49 +0100
Subject: [PATCH 1/7] Update tests to reduce amount of times py_api fixture is
 used

---
 tests/routers/openml/dataset_tag_test.py      | 106 ++---
 .../openml/datasets_list_datasets_test.py     | 426 +++++++++---------
 tests/routers/openml/datasets_test.py         | 359 ++++++++-------
 3 files changed, 462 insertions(+), 429 deletions(-)

diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py
index a9444c88..41746f83 100644
--- a/tests/routers/openml/dataset_tag_test.py
+++ b/tests/routers/openml/dataset_tag_test.py
@@ -4,10 +4,12 @@
 import pytest
 from sqlalchemy.ext.asyncio import AsyncConnection
 
-from core.errors import AuthenticationFailedError, TagAlreadyExistsError
+from core.errors import TagAlreadyExistsError
 from database.datasets import get_tags_for
+from database.users import User
+from routers.openml.datasets import tag_dataset
 from tests import constants
-from tests.users import ApiKey
+from tests.users import ADMIN_USER, OWNER_USER, SOME_USER, ApiKey
 
 
 @pytest.mark.parametrize(
@@ -22,73 +24,71 @@ async def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: httpx.Async
         json={"data_id": next(iter(constants.PRIVATE_DATASET_ID)), "tag": "test"},
     )
     assert response.status_code == HTTPStatus.UNAUTHORIZED
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["type"] == AuthenticationFailedError.uri
-    assert error["code"] == "103"
+
+
+@pytest.mark.parametrize(
+    "tag",
+    ["", "h@", " a", "a" * 65],
+    ids=["too short", "@", "space", "too long"],
+)
+async def test_dataset_tag_invalid_tag_is_rejected(
+    # Constraints for the tag are handled by FastAPI
+    tag: str,
+    py_api: httpx.AsyncClient,
+) -> None:
+    new = await py_api.post(
+        f"/datasets/tag?api_key={ApiKey.ADMIN}",
+        json={"data_id": 1, "tag": tag},
+    )
+
+    assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
+    assert new.json()["detail"][0]["loc"] == ["body", "tag"]
+
+
+# ── Direct call tests: tag_dataset ──
 
 
 @pytest.mark.mut
 @pytest.mark.parametrize(
-    "key",
-    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
+    "user",
+    [ADMIN_USER, SOME_USER, OWNER_USER],
     ids=["administrator", "non-owner", "owner"],
 )
-async def test_dataset_tag(
-    key: ApiKey, expdb_test: AsyncConnection, py_api: httpx.AsyncClient
-) -> None:
+async def test_dataset_tag(user: User, expdb_test: AsyncConnection) -> None:
     dataset_id, tag = next(iter(constants.PRIVATE_DATASET_ID)), "test"
-    response = await py_api.post(
-        f"/datasets/tag?api_key={key}",
-        json={"data_id": dataset_id, "tag": tag},
+    result = await tag_dataset(
+        data_id=dataset_id,
+        tag=tag,
+        user=user,
+        expdb_db=expdb_test,
     )
-    assert response.status_code == HTTPStatus.OK
-    assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}
+    assert result == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}
 
     tags = await get_tags_for(id_=dataset_id, connection=expdb_test)
     assert tag in tags
 
 
 @pytest.mark.mut
-async def test_dataset_tag_returns_existing_tags(py_api: httpx.AsyncClient) -> None:
-    dataset_id, tag = 1, "test"
-    response = await py_api.post(
-        f"/datasets/tag?api_key={ApiKey.ADMIN}",
-        json={"data_id": dataset_id, "tag": tag},
+async def test_dataset_tag_returns_existing_tags(expdb_test: AsyncConnection) -> None:
+    dataset_id, tag = 1, "test"  # Dataset 1 already is tagged with 'study_14'
+    result = await tag_dataset(
+        data_id=dataset_id,
+        tag=tag,
+        user=ADMIN_USER,
+        expdb_db=expdb_test,
     )
-    assert response.status_code == HTTPStatus.OK
-    assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}
+    assert result == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}
 
 
 @pytest.mark.mut
-async def test_dataset_tag_fails_if_tag_exists(py_api: httpx.AsyncClient) -> None:
+async def test_dataset_tag_fails_if_tag_exists(expdb_test: AsyncConnection) -> None:
     dataset_id, tag = 1, "study_14"  # Dataset 1 already is tagged with 'study_14'
-    response = await py_api.post(
-        f"/datasets/tag?api_key={ApiKey.ADMIN}",
-        json={"data_id": dataset_id, "tag": tag},
-    )
-    assert response.status_code == HTTPStatus.CONFLICT
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["type"] == TagAlreadyExistsError.uri
-    assert error["code"] == "473"
-    assert str(dataset_id) in error["detail"]
-    assert tag in error["detail"]
-
-
-@pytest.mark.parametrize(
-    "tag",
-    ["", "h@", " a", "a" * 65],
-    ids=["too short", "@", "space", "too long"],
-)
-async def test_dataset_tag_invalid_tag_is_rejected(
-    tag: str,
-    py_api: httpx.AsyncClient,
-) -> None:
-    new = await py_api.post(
-        f"/datasets/tag?api_key={ApiKey.ADMIN}",
-        json={"data_id": 1, "tag": tag},
-    )
-
-    assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
-    assert new.json()["detail"][0]["loc"] == ["body", "tag"]
+    with pytest.raises(TagAlreadyExistsError) as e:
+        await tag_dataset(
+            data_id=dataset_id,
+            tag=tag,
+            user=ADMIN_USER,
+            expdb_db=expdb_test,
+        )
+    assert str(dataset_id) in e.value.detail
+    assert tag in e.value.detail
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index e619c468..ded608fc 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -7,99 +7,193 @@
 import pytest
 from hypothesis import given
 from hypothesis import strategies as st
+from sqlalchemy.ext.asyncio import AsyncConnection
 
 from core.errors import NoResultsError
+from database.users import User
+from routers.dependencies import Pagination
+from routers.openml.datasets import DatasetStatusFilter, list_datasets
 from tests import constants
-from tests.users import ApiKey
+from tests.users import ADMIN_USER, DATASET_130_OWNER, OWNER_USER, SOME_USER, ApiKey
 
 
-def _assert_empty_result(
-    response: httpx.Response,
-) -> None:
-    assert response.status_code == HTTPStatus.NOT_FOUND
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["type"] == NoResultsError.uri
-    assert error["code"] == "372"
-
-
-async def test_list(py_api: httpx.AsyncClient) -> None:
+async def test_list_route(py_api: httpx.AsyncClient) -> None:
     response = await py_api.get("/datasets/list/")
     assert response.status_code == HTTPStatus.OK
     assert len(response.json()) >= 1
 
 
+@pytest.mark.slow
+@hypothesis.settings(  # type: ignore[untyped-decorator]  # 108
+    max_examples=500,  # This number needs to be better motivated
+    suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture],
+    deadline=None,
+)
+@given(  # type: ignore[untyped-decorator]  # 108
+    number_missing_values=st.sampled_from([None, "2", "2..10000"]),
+    number_features=st.sampled_from([None, "5", "2..100"]),
+    number_classes=st.sampled_from([None, "5", "2..100"]),
+    number_instances=st.sampled_from([None, "150", "2..100"]),
+    limit=st.sampled_from([None, 1, 100, 1000]),
+    offset=st.sampled_from([None, 1, 100, 1000]),
+    status=st.sampled_from([None, "active", "deactivated", "in_preparation"]),
+    data_id=st.sampled_from([None, [61], [61, 130]]),
+    data_name=st.sampled_from([None, "abalone", "iris", "NotPresentInTheDatabase"]),
+    data_version=st.sampled_from([None, 2, 4]),
+    tag=st.sampled_from([None, "study_14", "study_not_in_db"]),
+    # We don't test ADMIN user, as we fixed a bug which treated them as a regular user
+    api_key=st.sampled_from([None, ApiKey.SOME_USER, ApiKey.OWNER_USER]),
+)
+async def test_list_data_identical(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+    **kwargs: dict[str, Any],
+) -> Any:  # noqa: ANN401
+    limit, offset = kwargs["limit"], kwargs["offset"]
+    if (limit and not offset) or (offset and not limit):
+        # Behavior change: in new API these may be used independently, not in old.
+        return hypothesis.reject()
+
+    api_key = kwargs.pop("api_key")
+    api_key_query = f"?api_key={api_key}" if api_key else ""
+
+    # Pagination parameters are nested in the new query style
+    # The old style has no `limit` by default, so we mimic this with a high default
+    new_style = kwargs | {"pagination": {"limit": limit or 1_000_000}}
+    if offset is not None:
+        new_style["pagination"]["offset"] = offset
+
+    # old style `/data/filter` encodes all filters as a path
+    query = [
+        [filter_, value if not isinstance(value, list) else ",".join(str(v) for v in value)]
+        for filter_, value in kwargs.items()
+        if value is not None
+    ]
+    uri = "/data/list"
+    if query:
+        uri += f"/{'/'.join([str(v) for q in query for v in q])}"
+    uri += api_key_query
+
+    new, original = await asyncio.gather(
+        py_api.post(f"/datasets/list{api_key_query}", json=new_style),
+        php_api.get(uri),
+    )
+
+    # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
+    # and the error response format, so we can't compare error responses directly.
+    php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED
+    py_is_error = new.status_code == HTTPStatus.NOT_FOUND
+
+    if php_is_error or py_is_error:
+        # Both should be errors in the same cases
+        assert php_is_error == py_is_error, (
+            f"PHP status={original.status_code}, Python status={new.status_code}"
+        )
+        # Verify Python API returns RFC 9457 format
+        assert new.headers["content-type"] == "application/problem+json"
+        error = new.json()
+        assert error["type"] == NoResultsError.uri
+        assert error["code"] == "372"
+        assert original.json()["error"]["message"] == "No results"
+        assert error["detail"] == "No datasets match the search criteria."
+        return None
+    new_json = new.json()
+    # Qualities in new response are typed
+    for dataset in new_json:
+        for quality in dataset["quality"]:
+            quality["value"] = str(quality["value"])
+
+    # PHP API has a double nested dictionary that never has other entries
+    php_json = original.json()["data"]["dataset"]
+    assert len(php_json) == len(new_json)
+    assert php_json == new_json
+    return None
+
+
+# ── Direct call tests: list_datasets ──
+
+
 @pytest.mark.parametrize(
     ("status", "amount"),
     [
-        ("active", constants.NUMBER_OF_PUBLIC_ACTIVE_DATASETS),
-        ("deactivated", constants.NUMBER_OF_DEACTIVATED_DATASETS),
-        ("in_preparation", constants.NUMBER_OF_DATASETS_IN_PREPARATION),
-        ("all", constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
+        (DatasetStatusFilter.ACTIVE, constants.NUMBER_OF_PUBLIC_ACTIVE_DATASETS),
+        (DatasetStatusFilter.DEACTIVATED, constants.NUMBER_OF_DEACTIVATED_DATASETS),
+        (DatasetStatusFilter.IN_PREPARATION, constants.NUMBER_OF_DATASETS_IN_PREPARATION),
+        (
+            DatasetStatusFilter.ALL,
+            constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS,
+        ),
     ],
 )
-async def test_list_filter_active(status: str, amount: int, py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        "/datasets/list",
-        json={"status": status, "pagination": {"limit": constants.NUMBER_OF_DATASETS}},
+async def test_list_filter_active(
+    status: DatasetStatusFilter, amount: int, expdb_test: AsyncConnection
+) -> None:
+    result = await list_datasets(
+        pagination=Pagination(limit=constants.NUMBER_OF_DATASETS),
+        status=status,
+        user=None,
+        expdb_db=expdb_test,
     )
-    assert response.status_code == HTTPStatus.OK, response.json()
-    assert len(response.json()) == amount
+    assert len(result) == amount
 
 
 @pytest.mark.parametrize(
-    ("api_key", "amount"),
+    ("user", "amount"),
     [
-        (ApiKey.ADMIN, constants.NUMBER_OF_DATASETS),
-        (ApiKey.DATASET_130_OWNER, constants.NUMBER_OF_DATASETS),
-        (ApiKey.SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
+        (ADMIN_USER, constants.NUMBER_OF_DATASETS),
+        (DATASET_130_OWNER, constants.NUMBER_OF_DATASETS),
+        (SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
         (None, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
     ],
 )
 async def test_list_accounts_privacy(
-    api_key: ApiKey | None, amount: int, py_api: httpx.AsyncClient
+    user: User | None, amount: int, expdb_test: AsyncConnection
 ) -> None:
-    key = f"?api_key={api_key}" if api_key else ""
-    response = await py_api.post(
-        f"/datasets/list{key}",
-        json={"status": "all", "pagination": {"limit": 1000}},
+    result = await list_datasets(
+        pagination=Pagination(limit=1000),
+        status=DatasetStatusFilter.ALL,
+        user=user,
+        expdb_db=expdb_test,
     )
-    assert response.status_code == HTTPStatus.OK, response.json()
-    assert len(response.json()) == amount
+    assert len(result) == amount
 
 
 @pytest.mark.parametrize(
     ("name", "count"),
     [("abalone", 1), ("iris", 2)],
 )
-async def test_list_data_name_present(name: str, count: int, py_api: httpx.AsyncClient) -> None:
-    # The second iris dataset is private, so we need to authenticate.
-    response = await py_api.post(
-        f"/datasets/list?api_key={ApiKey.ADMIN}",
-        json={"status": "all", "data_name": name},
+async def test_list_data_name_present(name: str, count: int, expdb_test: AsyncConnection) -> None:
+    # The second iris dataset is private, so we need an admin user.
+    result = await list_datasets(
+        pagination=Pagination(),
+        status=DatasetStatusFilter.ALL,
+        data_name=name,
+        user=ADMIN_USER,
+        expdb_db=expdb_test,
     )
-    assert response.status_code == HTTPStatus.OK
-    datasets = response.json()
-    assert len(datasets) == count
-    assert all(dataset["name"] == name for dataset in datasets)
+    assert len(result) == count
+    assert all(dataset["name"] == name for dataset in result)
 
 
 @pytest.mark.parametrize(
     "name",
     ["ir", "long_name_without_overlap"],
 )
-async def test_list_data_name_absent(name: str, py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        f"/datasets/list?api_key={ApiKey.ADMIN}",
-        json={"status": "all", "data_name": name},
-    )
-    _assert_empty_result(response)
+async def test_list_data_name_absent(name: str, expdb_test: AsyncConnection) -> None:
+    with pytest.raises(NoResultsError):
+        await list_datasets(
+            pagination=Pagination(),
+            status=DatasetStatusFilter.ALL,
+            data_name=name,
+            user=ADMIN_USER,
+            expdb_db=expdb_test,
+        )
 
 
 @pytest.mark.parametrize("limit", [None, 5, 10, 200])
-@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140, 200])
+@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140])
 async def test_list_pagination(
-    limit: int | None, offset: int | None, py_api: httpx.AsyncClient
+    limit: int | None, offset: int | None, expdb_test: AsyncConnection
 ) -> None:
     # dataset ids are contiguous until 131, then there are 161, 162, and 163.
     extra_datasets = [161, 162, 163]
@@ -113,17 +207,19 @@ async def test_list_pagination(
     end = start + (100 if limit is None else limit)
     expected_ids = all_ids[start:end]
 
-    offset_body = {} if offset is None else {"offset": offset}
-    limit_body = {} if limit is None else {"limit": limit}
-    filters = {"status": "all", "pagination": offset_body | limit_body}
-    response = await py_api.post("/datasets/list", json=filters)
+    pagination = Pagination(offset=offset or 0, limit=limit or 100)
 
-    if offset in [140, 200]:
-        _assert_empty_result(response)
+    try:
+        result = await list_datasets(
+            pagination=pagination,
+            status=DatasetStatusFilter.ALL,
+            user=None,
+            expdb_db=expdb_test,
+        )
+    except NoResultsError:
+        assert offset == 140, "Result was expected but NoResultsError was raised."
         return
-
-    assert response.status_code == HTTPStatus.OK
-    reported_ids = {dataset["did"] for dataset in response.json()}
+    reported_ids = {dataset["did"] for dataset in result}
     assert reported_ids == set(expected_ids)
 
 
@@ -131,85 +227,96 @@ async def test_list_pagination(
     ("version", "count"),
     [(1, 100), (2, 7), (5, 1)],
 )
-async def test_list_data_version(version: int, count: int, py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        f"/datasets/list?api_key={ApiKey.ADMIN}",
-        json={"status": "all", "data_version": version},
+async def test_list_data_version(version: int, count: int, expdb_test: AsyncConnection) -> None:
+    result = await list_datasets(
+        pagination=Pagination(),
+        status=DatasetStatusFilter.ALL,
+        data_version=version,
+        user=ADMIN_USER,
+        expdb_db=expdb_test,
     )
-    assert response.status_code == HTTPStatus.OK
-    datasets = response.json()
-    assert len(datasets) == count
-    assert {dataset["version"] for dataset in datasets} == {version}
+    assert len(result) == count
+    assert {dataset["version"] for dataset in result} == {version}
 
 
-async def test_list_data_version_no_result(py_api: httpx.AsyncClient) -> None:
+async def test_list_data_version_no_result(expdb_test: AsyncConnection) -> None:
     version_with_no_datasets = 42
-    response = await py_api.post(
-        f"/datasets/list?api_key={ApiKey.ADMIN}",
-        json={"status": "all", "data_version": version_with_no_datasets},
-    )
-    _assert_empty_result(response)
+    with pytest.raises(NoResultsError):
+        await list_datasets(
+            pagination=Pagination(),
+            status=DatasetStatusFilter.ALL,
+            data_version=version_with_no_datasets,
+            user=ADMIN_USER,
+            expdb_db=expdb_test,
+        )
 
 
-@pytest.mark.parametrize(
-    "key",
-    [ApiKey.SOME_USER, ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
-)
+@pytest.mark.parametrize("user", [SOME_USER, DATASET_130_OWNER, ADMIN_USER])
 @pytest.mark.parametrize(
     ("user_id", "count"),
     [(1, 59), (2, 34), (16, 1)],
 )
-async def test_list_uploader(user_id: int, count: int, key: str, py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        f"/datasets/list?api_key={key}",
-        json={"status": "all", "uploader": user_id},
-    )
+async def test_list_uploader(
+    user_id: int, count: int, user: User, expdb_test: AsyncConnection
+) -> None:
     # The dataset of user 16 is private, so can not be retrieved by other users.
     owner_user_id = 16
-    if key == ApiKey.SOME_USER and user_id == owner_user_id:
-        _assert_empty_result(response)
-        return
-
-    assert response.status_code == HTTPStatus.OK
-    assert len(response.json()) == count
+    try:
+        result = await list_datasets(
+            pagination=Pagination(),
+            status=DatasetStatusFilter.ALL,
+            uploader=user_id,
+            user=user,
+            expdb_db=expdb_test,
+        )
+        assert len(result) == count
+    except NoResultsError:
+        assert user is SOME_USER, "Admin and Owner should always see a result"
+        assert user_id == owner_user_id, "Only empty result should be for owner_user filter"
 
 
 @pytest.mark.parametrize(
     "data_id",
     [[1], [1, 2, 3], [1, 2, 3, 3000], [1, 2, 3, 130]],
 )
-async def test_list_data_id(data_id: list[int], py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        "/datasets/list",
-        json={"status": "all", "data_id": data_id},
+async def test_list_data_id(data_id: list[int], expdb_test: AsyncConnection) -> None:
+    result = await list_datasets(
+        pagination=Pagination(),
+        status=DatasetStatusFilter.ALL,
+        data_id=data_id,
+        user=None,
+        expdb_db=expdb_test,
     )
-
-    assert response.status_code == HTTPStatus.OK
     private_or_not_exist = {130, 3000}
-    assert len(response.json()) == len(set(data_id) - private_or_not_exist)
+    expected = set(data_id) - private_or_not_exist
+    returned = {dataset["did"] for dataset in result}
+    assert returned == expected
 
 
 @pytest.mark.parametrize(
     ("tag", "count"),
     [("study_14", 100), ("study_15", 1)],
 )
-async def test_list_data_tag(tag: str, count: int, py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        "/datasets/list",
-        # study_14 has 100 datasets, we overwrite the default `limit` because otherwise
-        # we don't know if the results are limited by filtering on the tag.
-        json={"status": "all", "tag": tag, "pagination": {"limit": 101}},
+async def test_list_data_tag(tag: str, count: int, expdb_test: AsyncConnection) -> None:
+    result = await list_datasets(
+        pagination=Pagination(limit=101),
+        status=DatasetStatusFilter.ALL,
+        tag=tag,
+        user=None,
+        expdb_db=expdb_test,
     )
-    assert response.status_code == HTTPStatus.OK
-    assert len(response.json()) == count
+    assert len(result) == count
 
 
-async def test_list_data_tag_empty(py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        "/datasets/list",
-        json={"status": "all", "tag": "not-a-tag"},
-    )
-    _assert_empty_result(response)
+async def test_list_data_tag_empty(expdb_test: AsyncConnection) -> None:
+    with pytest.raises(NoResultsError):
+        await list_datasets(
+            pagination=Pagination(),
+            status=DatasetStatusFilter.ALL,
+            tag="not-a-tag",
+            user=None,
+            expdb_db=expdb_test,
+        )
 
 
 @pytest.mark.parametrize(
@@ -226,98 +333,13 @@ async def test_list_data_tag_empty(py_api: httpx.AsyncClient) -> None:
     ],
 )
 async def test_list_data_quality(
-    quality: str, range_: str, count: int, py_api: httpx.AsyncClient
+    quality: str, range_: str, count: int, expdb_test: AsyncConnection
 ) -> None:
-    response = await py_api.post(
-        "/datasets/list",
-        json={"status": "all", quality: range_},
+    result = await list_datasets(
+        pagination=Pagination(),
+        status=DatasetStatusFilter.ALL,
+        user=None,
+        expdb_db=expdb_test,
+        **{quality: range_},
     )
-    assert response.status_code == HTTPStatus.OK, response.json()
-    assert len(response.json()) == count
-
-
-@pytest.mark.slow
-@hypothesis.settings(  # type: ignore[untyped-decorator]  # 108
-    max_examples=500,  # This number needs to be better motivated
-    suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture],
-    deadline=None,
-)
-@given(  # type: ignore[untyped-decorator]  # 108
-    number_missing_values=st.sampled_from([None, "2", "2..10000"]),
-    number_features=st.sampled_from([None, "5", "2..100"]),
-    number_classes=st.sampled_from([None, "5", "2..100"]),
-    number_instances=st.sampled_from([None, "150", "2..100"]),
-    limit=st.sampled_from([None, 1, 100, 1000]),
-    offset=st.sampled_from([None, 1, 100, 1000]),
-    status=st.sampled_from([None, "active", "deactivated", "in_preparation"]),
-    data_id=st.sampled_from([None, [61], [61, 130]]),
-    data_name=st.sampled_from([None, "abalone", "iris", "NotPresentInTheDatabase"]),
-    data_version=st.sampled_from([None, 2, 4]),
-    tag=st.sampled_from([None, "study_14", "study_not_in_db"]),
-    # We don't test ADMIN user, as we fixed a bug which treated them as a regular user
-    api_key=st.sampled_from([None, ApiKey.SOME_USER, ApiKey.OWNER_USER]),
-)
-async def test_list_data_identical(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-    **kwargs: dict[str, Any],
-) -> Any:  # noqa: ANN401
-    limit, offset = kwargs["limit"], kwargs["offset"]
-    if (limit and not offset) or (offset and not limit):
-        # Behavior change: in new API these may be used independently, not in old.
-        return hypothesis.reject()
-
-    api_key = kwargs.pop("api_key")
-    api_key_query = f"?api_key={api_key}" if api_key else ""
-
-    # Pagination parameters are nested in the new query style
-    # The old style has no `limit` by default, so we mimic this with a high default
-    new_style = kwargs | {"pagination": {"limit": limit or 1_000_000}}
-    if offset is not None:
-        new_style["pagination"]["offset"] = offset
-
-    # old style `/data/filter` encodes all filters as a path
-    query = [
-        [filter_, value if not isinstance(value, list) else ",".join(str(v) for v in value)]
-        for filter_, value in kwargs.items()
-        if value is not None
-    ]
-    uri = "/data/list"
-    if query:
-        uri += f"/{'/'.join([str(v) for q in query for v in q])}"
-    uri += api_key_query
-
-    new, original = await asyncio.gather(
-        py_api.post(f"/datasets/list{api_key_query}", json=new_style),
-        php_api.get(uri),
-    )
-
-    # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
-    # and the error response format, so we can't compare error responses directly.
-    php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED
-    py_is_error = new.status_code == HTTPStatus.NOT_FOUND
-
-    if php_is_error or py_is_error:
-        # Both should be errors in the same cases
-        assert php_is_error == py_is_error, (
-            f"PHP status={original.status_code}, Python status={new.status_code}"
-        )
-        # Verify Python API returns RFC 9457 format
-        assert new.headers["content-type"] == "application/problem+json"
-        error = new.json()
-        assert error["type"] == NoResultsError.uri
-        assert error["code"] == "372"
-        assert original.json()["error"]["message"] == "No results"
-        assert error["detail"] == "No datasets match the search criteria."
-        return None
-    new_json = new.json()
-    # Qualities in new response are typed
-    for dataset in new_json:
-        for quality in dataset["quality"]:
-            quality["value"] = str(quality["value"])
-
-    # PHP API has a double nested dictionary that never has other entries
-    php_json = original.json()["data"]["dataset"]
-    assert len(php_json) == len(new_json)
-    assert php_json == new_json
-    return None
+    assert len(result) == count
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index 91ef5bee..f1e4a2fd 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -7,48 +7,27 @@
 from sqlalchemy.ext.asyncio import AsyncConnection
 
 from core.errors import (
+    DatasetAdminOnlyError,
     DatasetNoAccessError,
     DatasetNotFoundError,
+    DatasetNotOwnedError,
     DatasetProcessingError,
 )
 from database.users import User
-from routers.openml.datasets import get_dataset
+from routers.openml.datasets import get_dataset, get_dataset_features, update_dataset_status
 from schemas.datasets.openml import DatasetMetadata, DatasetStatus
 from tests import constants
 from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey
 
 
-@pytest.mark.parametrize(
-    ("dataset_id", "response_code"),
-    [
-        (-1, HTTPStatus.NOT_FOUND),
-        (138, HTTPStatus.NOT_FOUND),
-        (100_000, HTTPStatus.NOT_FOUND),
-    ],
-)
-async def test_error_unknown_dataset(
-    dataset_id: int,
-    response_code: int,
-    py_api: httpx.AsyncClient,
-) -> None:
-    response = await py_api.get(f"/datasets/{dataset_id}")
-
-    assert response.status_code == response_code
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["type"] == DatasetNotFoundError.uri
-    assert error["title"] == "Dataset Not Found"
-    assert error["status"] == HTTPStatus.NOT_FOUND
-    assert re.match(r"No dataset with id -?\d+ found.", error["detail"])
-    assert error["code"] == "111"
+# ── py_api: routing + serialization, RFC 9457 format, regression ──
 
 
-async def test_get_dataset(py_api: httpx.AsyncClient) -> None:
+async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
     response = await py_api.get("/datasets/1")
     assert response.status_code == HTTPStatus.OK
     description = response.json()
     assert description.pop("description").startswith("**Author**:")
-
     assert description == {
         "id": 1,
         "name": "anneal",
@@ -81,48 +60,7 @@ async def test_get_dataset(py_api: httpx.AsyncClient) -> None:
     }
 
 
-@pytest.mark.parametrize(
-    "user",
-    [
-        NO_USER,
-        SOME_USER,
-    ],
-)
-async def test_private_dataset_no_access(
-    user: User | None,
-    expdb_test: AsyncConnection,
-    user_test: AsyncConnection,
-) -> None:
-    with pytest.raises(DatasetNoAccessError) as e:
-        await get_dataset(
-            dataset_id=130,
-            user=user,
-            user_db=user_test,
-            expdb_db=expdb_test,
-        )
-    assert e.value.status_code == HTTPStatus.FORBIDDEN
-    assert e.value.uri == DatasetNoAccessError.uri
-    no_access = 112
-    assert e.value.code == no_access
-
-
-@pytest.mark.parametrize(
-    "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
-)
-async def test_private_dataset_access(
-    user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
-) -> None:
-    dataset = await get_dataset(
-        dataset_id=130,
-        user=user,
-        user_db=user_test,
-        expdb_db=expdb_test,
-    )
-    assert isinstance(dataset, DatasetMetadata)
-
-
-async def test_dataset_features(py_api: httpx.AsyncClient) -> None:
-    # Dataset 4 has both nominal and numerical features, so provides reasonable coverage
+async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
     response = await py_api.get("/datasets/features/4")
     assert response.status_code == HTTPStatus.OK
     assert response.json() == [
@@ -175,161 +113,234 @@ async def test_dataset_features(py_api: httpx.AsyncClient) -> None:
     ]
 
 
-async def test_dataset_features_with_ontology(py_api: httpx.AsyncClient) -> None:
-    # Dataset 11 has ontology data for features 1, 2, and 3
-    response = await py_api.get("/datasets/features/11")
+async def test_update_status_via_api(py_api: httpx.AsyncClient) -> None:
+    response = await py_api.post(
+        "/datasets/status/update",
+        json={"dataset_id": 1, "status": "active"},
+    )
+    # Without authentication, we expect 401 — confirms the route is wired up.
+    assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+
+async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
+    """Single test for the generic RFC 9457 exception handler — covers all error types."""
+    response = await py_api.get("/datasets/100000")
+    assert response.status_code == HTTPStatus.NOT_FOUND
+    assert response.headers["content-type"] == "application/problem+json"
+    error = response.json()
+    assert error["type"] == DatasetNotFoundError.uri
+    assert error["title"] == "Dataset Not Found"
+    assert error["status"] == HTTPStatus.NOT_FOUND
+    assert re.match(r"No dataset with id \d+ found.", error["detail"])
+    assert error["code"] == "111"
+
+
+@pytest.mark.mut
+async def test_dataset_no_500_with_multiple_processing_entries(
+    py_api: httpx.AsyncClient,
+    expdb_test: AsyncConnection,
+) -> None:
+    """Regression test for issue #145: multiple processing entries caused 500."""
+    await expdb_test.execute(
+        text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
+    )
+    await expdb_test.execute(
+        text(
+            "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
+            "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
+        ),
+    )
+    response = await py_api.get("/datasets/1")
     assert response.status_code == HTTPStatus.OK
-    features = {f["index"]: f for f in response.json()}
-    assert features[1]["ontology"] == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
-    assert features[2]["ontology"] == [
+
+
+# ── Direct call tests: get_dataset ──
+
+
+@pytest.mark.parametrize(
+    "dataset_id",
+    [-1, 138, 100_000],
+)
+async def test_get_dataset_not_found(
+    dataset_id: int,
+    expdb_test: AsyncConnection,
+    user_test: AsyncConnection,
+) -> None:
+    with pytest.raises(DatasetNotFoundError):
+        await get_dataset(
+            dataset_id=dataset_id,
+            user=None,
+            user_db=user_test,
+            expdb_db=expdb_test,
+        )
+
+
+@pytest.mark.parametrize(
+    "user",
+    [
+        NO_USER,
+        SOME_USER,
+    ],
+)
+async def test_private_dataset_no_access(
+    user: User | None,
+    expdb_test: AsyncConnection,
+    user_test: AsyncConnection,
+) -> None:
+    with pytest.raises(DatasetNoAccessError) as e:
+        await get_dataset(
+            dataset_id=130,
+            user=user,
+            user_db=user_test,
+            expdb_db=expdb_test,
+        )
+    assert e.value.status_code == HTTPStatus.FORBIDDEN
+    assert e.value.uri == DatasetNoAccessError.uri
+    no_access = 112
+    assert e.value.code == no_access
+
+
+@pytest.mark.parametrize(
+    "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
+)
+async def test_private_dataset_access(
+    user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
+) -> None:
+    dataset = await get_dataset(
+        dataset_id=130,
+        user=user,
+        user_db=user_test,
+        expdb_db=expdb_test,
+    )
+    assert isinstance(dataset, DatasetMetadata)
+
+
+# ── Direct call tests: get_dataset_features ──
+
+
+async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
+    features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
+    by_index = {f.index: f for f in features}
+    assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
+    assert by_index[2].ontology == [
         "https://en.wikipedia.org/wiki/Car_door",
         "https://en.wikipedia.org/wiki/Door",
     ]
-    assert features[3]["ontology"] == [
+    assert by_index[3].ontology == [
         "https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
     ]
-    # Features without ontology should not include the field
-    assert "ontology" not in features[0]
-    assert "ontology" not in features[4]
+    assert by_index[0].ontology is None
+    assert by_index[4].ontology is None
 
 
-async def test_dataset_features_no_access(py_api: httpx.AsyncClient) -> None:
-    response = await py_api.get("/datasets/features/130")
-    assert response.status_code == HTTPStatus.FORBIDDEN
+async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
+    with pytest.raises(DatasetNoAccessError):
+        await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)
 
 
-@pytest.mark.parametrize(
-    "api_key",
-    [ApiKey.ADMIN, ApiKey.DATASET_130_OWNER],
-)
+@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
 async def test_dataset_features_access_to_private(
-    api_key: ApiKey, py_api: httpx.AsyncClient
+    user: User, expdb_test: AsyncConnection
 ) -> None:
-    response = await py_api.get(f"/datasets/features/130?api_key={api_key}")
-    assert response.status_code == HTTPStatus.OK
+    features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
+    assert isinstance(features, list)
 
 
-async def test_dataset_features_with_processing_error(py_api: httpx.AsyncClient) -> None:
-    # When a dataset is processed to extract its feature metadata, errors may occur.
-    # In that case, no feature information will ever be available.
+async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
     dataset_id = 55
-    response = await py_api.get(f"/datasets/features/{dataset_id}")
-    assert response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["type"] == DatasetProcessingError.uri
-    assert error["code"] == "274"
-    assert "No features found" in error["detail"]
-    assert str(dataset_id) in error["detail"]
+    with pytest.raises(DatasetProcessingError) as e:
+        await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
+    assert "No features found" in e.value.detail
+    assert str(dataset_id) in e.value.detail
 
 
-async def test_dataset_features_dataset_does_not_exist(py_api: httpx.AsyncClient) -> None:
-    resource = await py_api.get("/datasets/features/1000")
-    assert resource.status_code == HTTPStatus.NOT_FOUND
+async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
+    with pytest.raises(DatasetNotFoundError):
+        await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
 
 
-async def _assert_status_update_is_successful(
-    apikey: ApiKey,
-    dataset_id: int,
-    status: str,
-    py_api: httpx.AsyncClient,
-) -> None:
-    response = await py_api.post(
-        f"/datasets/status/update?api_key={apikey}",
-        json={"dataset_id": dataset_id, "status": status},
-    )
-    assert response.status_code == HTTPStatus.OK
-    assert response.json() == {
-        "dataset_id": dataset_id,
-        "status": status,
-    }
+# ── Direct call tests: update_dataset_status ──
 
 
 @pytest.mark.mut
-@pytest.mark.parametrize(
-    "dataset_id",
-    [3, 4],
-)
+@pytest.mark.parametrize("dataset_id", [3, 4])
 async def test_dataset_status_update_active_to_deactivated(
-    dataset_id: int, py_api: httpx.AsyncClient
+    dataset_id: int, expdb_test: AsyncConnection
 ) -> None:
-    await _assert_status_update_is_successful(
-        apikey=ApiKey.ADMIN,
+    result = await update_dataset_status(
         dataset_id=dataset_id,
         status=DatasetStatus.DEACTIVATED,
-        py_api=py_api,
+        user=ADMIN_USER,
+        expdb=expdb_test,
     )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
 
 
 @pytest.mark.mut
-async def test_dataset_status_update_in_preparation_to_active(py_api: httpx.AsyncClient) -> None:
-    await _assert_status_update_is_successful(
-        apikey=ApiKey.ADMIN,
-        dataset_id=next(iter(constants.IN_PREPARATION_ID)),
+async def test_dataset_status_update_in_preparation_to_active(
+    expdb_test: AsyncConnection,
+) -> None:
+    dataset_id = next(iter(constants.IN_PREPARATION_ID))
+    result = await update_dataset_status(
+        dataset_id=dataset_id,
         status=DatasetStatus.ACTIVE,
-        py_api=py_api,
+        user=ADMIN_USER,
+        expdb=expdb_test,
     )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
 
 
 @pytest.mark.mut
 async def test_dataset_status_update_in_preparation_to_deactivated(
-    py_api: httpx.AsyncClient,
+    expdb_test: AsyncConnection,
 ) -> None:
-    await _assert_status_update_is_successful(
-        apikey=ApiKey.ADMIN,
-        dataset_id=next(iter(constants.IN_PREPARATION_ID)),
+    dataset_id = next(iter(constants.IN_PREPARATION_ID))
+    result = await update_dataset_status(
+        dataset_id=dataset_id,
         status=DatasetStatus.DEACTIVATED,
-        py_api=py_api,
+        user=ADMIN_USER,
+        expdb=expdb_test,
     )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
 
 
 @pytest.mark.mut
-async def test_dataset_status_update_deactivated_to_active(py_api: httpx.AsyncClient) -> None:
-    await _assert_status_update_is_successful(
-        apikey=ApiKey.ADMIN,
-        dataset_id=next(iter(constants.DEACTIVATED_DATASETS)),
+async def test_dataset_status_update_deactivated_to_active(
+    expdb_test: AsyncConnection,
+) -> None:
+    dataset_id = next(iter(constants.DEACTIVATED_DATASETS))
+    result = await update_dataset_status(
+        dataset_id=dataset_id,
         status=DatasetStatus.ACTIVE,
-        py_api=py_api,
+        user=ADMIN_USER,
+        expdb=expdb_test,
     )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
 
 
-@pytest.mark.parametrize(
-    ("dataset_id", "api_key", "status"),
-    [
-        (1, ApiKey.SOME_USER, DatasetStatus.ACTIVE),
-        (1, ApiKey.SOME_USER, DatasetStatus.DEACTIVATED),
-        (2, ApiKey.SOME_USER, DatasetStatus.DEACTIVATED),
-        (33, ApiKey.SOME_USER, DatasetStatus.ACTIVE),
-        (131, ApiKey.SOME_USER, DatasetStatus.ACTIVE),
-    ],
-)
-async def test_dataset_status_unauthorized(
+@pytest.mark.parametrize("dataset_id", [1, 33, 131])
+async def test_dataset_status_non_admin_cannot_activate(
     dataset_id: int,
-    api_key: ApiKey,
-    status: str,
-    py_api: httpx.AsyncClient,
+    expdb_test: AsyncConnection,
 ) -> None:
-    response = await py_api.post(
-        f"/datasets/status/update?api_key={api_key}",
-        json={"dataset_id": dataset_id, "status": status},
-    )
-    assert response.status_code == HTTPStatus.FORBIDDEN
+    with pytest.raises(DatasetAdminOnlyError):
+        await update_dataset_status(
+            dataset_id=dataset_id,
+            status=DatasetStatus.ACTIVE,
+            user=SOME_USER,
+            expdb=expdb_test,
+        )
 
 
-@pytest.mark.mut
-async def test_dataset_no_500_with_multiple_processing_entries(
-    py_api: httpx.AsyncClient,
+@pytest.mark.parametrize("dataset_id", [1, 2])
+async def test_dataset_status_non_owner_cannot_deactivate(
+    dataset_id: int,
     expdb_test: AsyncConnection,
 ) -> None:
-    """Regression test for issue #145: multiple processing entries caused 500."""
-    await expdb_test.execute(
-        text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
-    )
-    await expdb_test.execute(
-        text(
-            "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
-            "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
-        ),
-    )
-    response = await py_api.get("/datasets/1")
-    assert response.status_code == HTTPStatus.OK
+    with pytest.raises(DatasetNotOwnedError):
+        await update_dataset_status(
+            dataset_id=dataset_id,
+            status=DatasetStatus.DEACTIVATED,
+            user=SOME_USER,
+            expdb=expdb_test,
+        )

From a4f5cdc76e64a064f1610a4517e55a97f970c81b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 26 Mar 2026 15:29:15 +0000
Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/routers/openml/datasets_list_datasets_test.py | 2 +-
 tests/routers/openml/datasets_test.py               | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index ded608fc..10822cc5 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -14,7 +14,7 @@
 from routers.dependencies import Pagination
 from routers.openml.datasets import DatasetStatusFilter, list_datasets
 from tests import constants
-from tests.users import ADMIN_USER, DATASET_130_OWNER, OWNER_USER, SOME_USER, ApiKey
+from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey
 
 
 async def test_list_route(py_api: httpx.AsyncClient) -> None:
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index f1e4a2fd..aaedded3 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -17,8 +17,7 @@
 from routers.openml.datasets import get_dataset, get_dataset_features, update_dataset_status
 from schemas.datasets.openml import DatasetMetadata, DatasetStatus
 from tests import constants
-from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey
-
+from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
 
 # ── py_api: routing + serialization, RFC 9457 format, regression ──
 
@@ -239,9 +238,7 @@ async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
 
 
 @pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
-async def test_dataset_features_access_to_private(
-    user: User, expdb_test: AsyncConnection
-) -> None:
+async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
     features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
     assert isinstance(features, list)
 

From e2c27b5d3a984bc34794fd735edb593b7e97ecff Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 26 Mar 2026 16:44:23 +0100
Subject: [PATCH 3/7] Separate out tests to one file per endpoint

---
 .../routers/openml/datasets_features_test.py  | 106 +++++++++++++
 tests/routers/openml/datasets_get_test.py     | 142 ++++++++++++++++++
 tests/routers/openml/datasets_status_test.py  | 106 +++++++++++++
 3 files changed, 354 insertions(+)
 create mode 100644 tests/routers/openml/datasets_features_test.py
 create mode 100644 tests/routers/openml/datasets_get_test.py
 create mode 100644 tests/routers/openml/datasets_status_test.py

diff --git a/tests/routers/openml/datasets_features_test.py b/tests/routers/openml/datasets_features_test.py
new file mode 100644
index 00000000..aa3988b5
--- /dev/null
+++ b/tests/routers/openml/datasets_features_test.py
@@ -0,0 +1,106 @@
+"""Tests for the GET /datasets/features/{dataset_id} endpoint."""
+
+from http import HTTPStatus
+
+import httpx
+import pytest
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from core.errors import DatasetNoAccessError, DatasetNotFoundError, DatasetProcessingError
+from database.users import User
+from routers.openml.datasets import get_dataset_features
+from tests.users import ADMIN_USER, DATASET_130_OWNER
+
+
+async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
+    response = await py_api.get("/datasets/features/4")
+    assert response.status_code == HTTPStatus.OK
+    assert response.json() == [
+        {
+            "index": 0,
+            "name": "left-weight",
+            "data_type": "numeric",
+            "is_target": False,
+            "is_ignore": False,
+            "is_row_identifier": False,
+            "number_of_missing_values": 0,
+        },
+        {
+            "index": 1,
+            "name": "left-distance",
+            "data_type": "numeric",
+            "is_target": False,
+            "is_ignore": False,
+            "is_row_identifier": False,
+            "number_of_missing_values": 0,
+        },
+        {
+            "index": 2,
+            "name": "right-weight",
+            "data_type": "numeric",
+            "is_target": False,
+            "is_ignore": False,
+            "is_row_identifier": False,
+            "number_of_missing_values": 0,
+        },
+        {
+            "index": 3,
+            "name": "right-distance",
+            "data_type": "numeric",
+            "is_target": False,
+            "is_ignore": False,
+            "is_row_identifier": False,
+            "number_of_missing_values": 0,
+        },
+        {
+            "index": 4,
+            "name": "class",
+            "data_type": "nominal",
+            "nominal_values": ["B", "L", "R"],
+            "is_target": True,
+            "is_ignore": False,
+            "is_row_identifier": False,
+            "number_of_missing_values": 0,
+        },
+    ]
+
+
+async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
+    features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
+    by_index = {f.index: f for f in features}
+    assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
+    assert by_index[2].ontology == [
+        "https://en.wikipedia.org/wiki/Car_door",
+        "https://en.wikipedia.org/wiki/Door",
+    ]
+    assert by_index[3].ontology == [
+        "https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
+    ]
+    assert by_index[0].ontology is None
+    assert by_index[4].ontology is None
+
+
+async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
+    with pytest.raises(DatasetNoAccessError):
+        await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)
+
+
+@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
+async def test_dataset_features_access_to_private(
+    user: User, expdb_test: AsyncConnection
+) -> None:
+    features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
+    assert isinstance(features, list)
+
+
+async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
+    dataset_id = 55
+    with pytest.raises(DatasetProcessingError) as e:
+        await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
+    assert "No features found" in e.value.detail
+    assert str(dataset_id) in e.value.detail
+
+
+async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
+    with pytest.raises(DatasetNotFoundError):
+        await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
diff --git a/tests/routers/openml/datasets_get_test.py b/tests/routers/openml/datasets_get_test.py
new file mode 100644
index 00000000..fe67abee
--- /dev/null
+++ b/tests/routers/openml/datasets_get_test.py
@@ -0,0 +1,142 @@
+"""Tests for the GET /datasets/{dataset_id} endpoint."""
+
+import re
+from http import HTTPStatus
+
+import httpx
+import pytest
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from core.errors import DatasetNoAccessError, DatasetNotFoundError
+from database.users import User
+from routers.openml.datasets import get_dataset
+from schemas.datasets.openml import DatasetMetadata
+from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
+
+
+async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
+    response = await py_api.get("/datasets/1")
+    assert response.status_code == HTTPStatus.OK
+    description = response.json()
+    assert description.pop("description").startswith("**Author**:")
+    assert description == {
+        "id": 1,
+        "name": "anneal",
+        "version": 1,
+        "format": "arff",
+        "description_version": 1,
+        "upload_date": "2014-04-06T23:19:24",
+        "licence": "Public",
+        "url": "http://php-api/data/v1/download/1/anneal.arff",
+        "parquet_url": "http://minio:9000/datasets/0000/0001/dataset_1.pq",
+        "file_id": 1,
+        "default_target_attribute": ["class"],
+        "version_label": "1",
+        "tag": ["study_14"],
+        "visibility": "public",
+        "status": "active",
+        "processing_date": "2024-01-04T10:13:59",
+        "md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",
+        "row_id_attribute": [],
+        "ignore_attribute": [],
+        "language": "",
+        "error": None,
+        "warning": None,
+        "citation": "",
+        "collection_date": None,
+        "contributor": [],
+        "creator": [],
+        "paper_url": None,
+        "original_data_url": [],
+    }
+
+
+async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
+    """Single test for the generic RFC 9457 exception handler — covers all error types."""
+    response = await py_api.get("/datasets/100000")
+    assert response.status_code == HTTPStatus.NOT_FOUND
+    assert response.headers["content-type"] == "application/problem+json"
+    error = response.json()
+    assert error["type"] == DatasetNotFoundError.uri
+    assert error["title"] == "Dataset Not Found"
+    assert error["status"] == HTTPStatus.NOT_FOUND
+    assert re.match(r"No dataset with id \d+ found.", error["detail"])
+    assert error["code"] == "111"
+
+
+@pytest.mark.mut
+async def test_dataset_no_500_with_multiple_processing_entries(
+    py_api: httpx.AsyncClient,
+    expdb_test: AsyncConnection,
+) -> None:
+    """Regression test for issue #145: multiple processing entries caused 500."""
+    await expdb_test.execute(
+        text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
+    )
+    await expdb_test.execute(
+        text(
+            "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
+            "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
+        ),
+    )
+    response = await py_api.get("/datasets/1")
+    assert response.status_code == HTTPStatus.OK
+
+
+@pytest.mark.parametrize(
+    "dataset_id",
+    [-1, 138, 100_000],
+)
+async def test_get_dataset_not_found(
+    dataset_id: int,
+    expdb_test: AsyncConnection,
+    user_test: AsyncConnection,
+) -> None:
+    with pytest.raises(DatasetNotFoundError):
+        await get_dataset(
+            dataset_id=dataset_id,
+            user=None,
+            user_db=user_test,
+            expdb_db=expdb_test,
+        )
+
+
+@pytest.mark.parametrize(
+    "user",
+    [
+        NO_USER,
+        SOME_USER,
+    ],
+)
+async def test_private_dataset_no_access(
+    user: User | None,
+    expdb_test: AsyncConnection,
+    user_test: AsyncConnection,
+) -> None:
+    with pytest.raises(DatasetNoAccessError) as e:
+        await get_dataset(
+            dataset_id=130,
+            user=user,
+            user_db=user_test,
+            expdb_db=expdb_test,
+        )
+    assert e.value.status_code == HTTPStatus.FORBIDDEN
+    assert e.value.uri == DatasetNoAccessError.uri
+    no_access = 112
+    assert e.value.code == no_access
+
+
+@pytest.mark.parametrize(
+    "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
+)
+async def test_private_dataset_access(
+    user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
+) -> None:
+    dataset = await get_dataset(
+        dataset_id=130,
+        user=user,
+        user_db=user_test,
+        expdb_db=expdb_test,
+    )
+    assert isinstance(dataset, DatasetMetadata)
diff --git a/tests/routers/openml/datasets_status_test.py b/tests/routers/openml/datasets_status_test.py
new file mode 100644
index 00000000..1e2271fc
--- /dev/null
+++ b/tests/routers/openml/datasets_status_test.py
@@ -0,0 +1,106 @@
+"""Tests for the POST /datasets/status/update endpoint."""
+
+from http import HTTPStatus
+
+import httpx
+import pytest
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from core.errors import DatasetAdminOnlyError, DatasetNotOwnedError
+from routers.openml.datasets import update_dataset_status
+from schemas.datasets.openml import DatasetStatus
+from tests import constants
+from tests.users import ADMIN_USER, SOME_USER
+
+
+async def test_update_status_via_api(py_api: httpx.AsyncClient) -> None:
+    response = await py_api.post(
+        "/datasets/status/update",
+        json={"dataset_id": 1, "status": "active"},
+    )
+    # Without authentication, we expect 401 — confirms the route is wired up.
+    assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+
+@pytest.mark.mut
+@pytest.mark.parametrize("dataset_id", [3, 4])
+async def test_dataset_status_update_active_to_deactivated(
+    dataset_id: int, expdb_test: AsyncConnection
+) -> None:
+    result = await update_dataset_status(
+        dataset_id=dataset_id,
+        status=DatasetStatus.DEACTIVATED,
+        user=ADMIN_USER,
+        expdb=expdb_test,
+    )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
+
+
+@pytest.mark.mut
+async def test_dataset_status_update_in_preparation_to_active(
+    expdb_test: AsyncConnection,
+) -> None:
+    dataset_id = next(iter(constants.IN_PREPARATION_ID))
+    result = await update_dataset_status(
+        dataset_id=dataset_id,
+        status=DatasetStatus.ACTIVE,
+        user=ADMIN_USER,
+        expdb=expdb_test,
+    )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
+
+
+@pytest.mark.mut
+async def test_dataset_status_update_in_preparation_to_deactivated(
+    expdb_test: AsyncConnection,
+) -> None:
+    dataset_id = next(iter(constants.IN_PREPARATION_ID))
+    result = await update_dataset_status(
+        dataset_id=dataset_id,
+        status=DatasetStatus.DEACTIVATED,
+        user=ADMIN_USER,
+        expdb=expdb_test,
+    )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
+
+
+@pytest.mark.mut
+async def test_dataset_status_update_deactivated_to_active(
+    expdb_test: AsyncConnection,
+) -> None:
+    dataset_id = next(iter(constants.DEACTIVATED_DATASETS))
+    result = await update_dataset_status(
+        dataset_id=dataset_id,
+        status=DatasetStatus.ACTIVE,
+        user=ADMIN_USER,
+        expdb=expdb_test,
+    )
+    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
+
+
+@pytest.mark.parametrize("dataset_id", [1, 33, 131])
+async def test_dataset_status_non_admin_cannot_activate(
+    dataset_id: int,
+    expdb_test: AsyncConnection,
+) -> None:
+    with pytest.raises(DatasetAdminOnlyError):
+        await update_dataset_status(
+            dataset_id=dataset_id,
+            status=DatasetStatus.ACTIVE,
+            user=SOME_USER,
+            expdb=expdb_test,
+        )
+
+
+@pytest.mark.parametrize("dataset_id", [1, 2])
+async def test_dataset_status_non_owner_cannot_deactivate(
+    dataset_id: int,
+    expdb_test: AsyncConnection,
+) -> None:
+    with pytest.raises(DatasetNotOwnedError):
+        await update_dataset_status(
+            dataset_id=dataset_id,
+            status=DatasetStatus.DEACTIVATED,
+            user=SOME_USER,
+            expdb=expdb_test,
+        )

From 6b3a0141f6a7deb7e5156d023cac41bab22f4b85 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 26 Mar 2026 16:44:50 +0100
Subject: [PATCH 4/7] Remove old test file

---
 tests/routers/openml/datasets_test.py | 343 --------------------------
 1 file changed, 343 deletions(-)
 delete mode 100644 tests/routers/openml/datasets_test.py

diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
deleted file mode 100644
index aaedded3..00000000
--- a/tests/routers/openml/datasets_test.py
+++ /dev/null
@@ -1,343 +0,0 @@
-import re
-from http import HTTPStatus
-
-import httpx
-import pytest
-from sqlalchemy import text
-from sqlalchemy.ext.asyncio import AsyncConnection
-
-from core.errors import (
-    DatasetAdminOnlyError,
-    DatasetNoAccessError,
-    DatasetNotFoundError,
-    DatasetNotOwnedError,
-    DatasetProcessingError,
-)
-from database.users import User
-from routers.openml.datasets import get_dataset, get_dataset_features, update_dataset_status
-from schemas.datasets.openml import DatasetMetadata, DatasetStatus
-from tests import constants
-from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
-
-# ── py_api: routing + serialization, RFC 9457 format, regression ──
-
-
-async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
-    response = await py_api.get("/datasets/1")
-    assert response.status_code == HTTPStatus.OK
-    description = response.json()
-    assert description.pop("description").startswith("**Author**:")
-    assert description == {
-        "id": 1,
-        "name": "anneal",
-        "version": 1,
-        "format": "arff",
-        "description_version": 1,
-        "upload_date": "2014-04-06T23:19:24",
-        "licence": "Public",
-        "url": "http://php-api/data/v1/download/1/anneal.arff",
-        "parquet_url": "http://minio:9000/datasets/0000/0001/dataset_1.pq",
-        "file_id": 1,
-        "default_target_attribute": ["class"],
-        "version_label": "1",
-        "tag": ["study_14"],
-        "visibility": "public",
-        "status": "active",
-        "processing_date": "2024-01-04T10:13:59",
-        "md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",
-        "row_id_attribute": [],
-        "ignore_attribute": [],
-        "language": "",
-        "error": None,
-        "warning": None,
-        "citation": "",
-        "collection_date": None,
-        "contributor": [],
-        "creator": [],
-        "paper_url": None,
-        "original_data_url": [],
-    }
-
-
-async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
-    response = await py_api.get("/datasets/features/4")
-    assert response.status_code == HTTPStatus.OK
-    assert response.json() == [
-        {
-            "index": 0,
-            "name": "left-weight",
-            "data_type": "numeric",
-            "is_target": False,
-            "is_ignore": False,
-            "is_row_identifier": False,
-            "number_of_missing_values": 0,
-        },
-        {
-            "index": 1,
-            "name": "left-distance",
-            "data_type": "numeric",
-            "is_target": False,
-            "is_ignore": False,
-            "is_row_identifier": False,
-            "number_of_missing_values": 0,
-        },
-        {
-            "index": 2,
-            "name": "right-weight",
-            "data_type": "numeric",
-            "is_target": False,
-            "is_ignore": False,
-            "is_row_identifier": False,
-            "number_of_missing_values": 0,
-        },
-        {
-            "index": 3,
-            "name": "right-distance",
-            "data_type": "numeric",
-            "is_target": False,
-            "is_ignore": False,
-            "is_row_identifier": False,
-            "number_of_missing_values": 0,
-        },
-        {
-            "index": 4,
-            "name": "class",
-            "data_type": "nominal",
-            "nominal_values": ["B", "L", "R"],
-            "is_target": True,
-            "is_ignore": False,
-            "is_row_identifier": False,
-            "number_of_missing_values": 0,
-        },
-    ]
-
-
-async def test_update_status_via_api(py_api: httpx.AsyncClient) -> None:
-    response = await py_api.post(
-        "/datasets/status/update",
-        json={"dataset_id": 1, "status": "active"},
-    )
-    # Without authentication, we expect 401 — confirms the route is wired up.
-    assert response.status_code == HTTPStatus.UNAUTHORIZED
-
-
-async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
-    """Single test for the generic RFC 9457 exception handler — covers all error types."""
-    response = await py_api.get("/datasets/100000")
-    assert response.status_code == HTTPStatus.NOT_FOUND
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["type"] == DatasetNotFoundError.uri
-    assert error["title"] == "Dataset Not Found"
-    assert error["status"] == HTTPStatus.NOT_FOUND
-    assert re.match(r"No dataset with id \d+ found.", error["detail"])
-    assert error["code"] == "111"
-
-
-@pytest.mark.mut
-async def test_dataset_no_500_with_multiple_processing_entries(
-    py_api: httpx.AsyncClient,
-    expdb_test: AsyncConnection,
-) -> None:
-    """Regression test for issue #145: multiple processing entries caused 500."""
-    await expdb_test.execute(
-        text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
-    )
-    await expdb_test.execute(
-        text(
-            "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
-            "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
-        ),
-    )
-    response = await py_api.get("/datasets/1")
-    assert response.status_code == HTTPStatus.OK
-
-
-# ── Direct call tests: get_dataset ──
-
-
-@pytest.mark.parametrize(
-    "dataset_id",
-    [-1, 138, 100_000],
-)
-async def test_get_dataset_not_found(
-    dataset_id: int,
-    expdb_test: AsyncConnection,
-    user_test: AsyncConnection,
-) -> None:
-    with pytest.raises(DatasetNotFoundError):
-        await get_dataset(
-            dataset_id=dataset_id,
-            user=None,
-            user_db=user_test,
-            expdb_db=expdb_test,
-        )
-
-
-@pytest.mark.parametrize(
-    "user",
-    [
-        NO_USER,
-        SOME_USER,
-    ],
-)
-async def test_private_dataset_no_access(
-    user: User | None,
-    expdb_test: AsyncConnection,
-    user_test: AsyncConnection,
-) -> None:
-    with pytest.raises(DatasetNoAccessError) as e:
-        await get_dataset(
-            dataset_id=130,
-            user=user,
-            user_db=user_test,
-            expdb_db=expdb_test,
-        )
-    assert e.value.status_code == HTTPStatus.FORBIDDEN
-    assert e.value.uri == DatasetNoAccessError.uri
-    no_access = 112
-    assert e.value.code == no_access
-
-
-@pytest.mark.parametrize(
-    "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
-)
-async def test_private_dataset_access(
-    user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
-) -> None:
-    dataset = await get_dataset(
-        dataset_id=130,
-        user=user,
-        user_db=user_test,
-        expdb_db=expdb_test,
-    )
-    assert isinstance(dataset, DatasetMetadata)
-
-
-# ── Direct call tests: get_dataset_features ──
-
-
-async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
-    features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
-    by_index = {f.index: f for f in features}
-    assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
-    assert by_index[2].ontology == [
-        "https://en.wikipedia.org/wiki/Car_door",
-        "https://en.wikipedia.org/wiki/Door",
-    ]
-    assert by_index[3].ontology == [
-        "https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
-    ]
-    assert by_index[0].ontology is None
-    assert by_index[4].ontology is None
-
-
-async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
-    with pytest.raises(DatasetNoAccessError):
-        await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)
-
-
-@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
-async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
-    features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
-    assert isinstance(features, list)
-
-
-async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
-    dataset_id = 55
-    with pytest.raises(DatasetProcessingError) as e:
-        await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
-    assert "No features found" in e.value.detail
-    assert str(dataset_id) in e.value.detail
-
-
-async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
-    with pytest.raises(DatasetNotFoundError):
-        await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
-
-
-# ── Direct call tests: update_dataset_status ──
-
-
-@pytest.mark.mut
-@pytest.mark.parametrize("dataset_id", [3, 4])
-async def test_dataset_status_update_active_to_deactivated(
-    dataset_id: int, expdb_test: AsyncConnection
-) -> None:
-    result = await update_dataset_status(
-        dataset_id=dataset_id,
-        status=DatasetStatus.DEACTIVATED,
-        user=ADMIN_USER,
-        expdb=expdb_test,
-    )
-    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
-
-
-@pytest.mark.mut
-async def test_dataset_status_update_in_preparation_to_active(
-    expdb_test: AsyncConnection,
-) -> None:
-    dataset_id = next(iter(constants.IN_PREPARATION_ID))
-    result = await update_dataset_status(
-        dataset_id=dataset_id,
-        status=DatasetStatus.ACTIVE,
-        user=ADMIN_USER,
-        expdb=expdb_test,
-    )
-    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
-
-
-@pytest.mark.mut
-async def test_dataset_status_update_in_preparation_to_deactivated(
-    expdb_test: AsyncConnection,
-) -> None:
-    dataset_id = next(iter(constants.IN_PREPARATION_ID))
-    result = await update_dataset_status(
-        dataset_id=dataset_id,
-        status=DatasetStatus.DEACTIVATED,
-        user=ADMIN_USER,
-        expdb=expdb_test,
-    )
-    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
-
-
-@pytest.mark.mut
-async def test_dataset_status_update_deactivated_to_active(
-    expdb_test: AsyncConnection,
-) -> None:
-    dataset_id = next(iter(constants.DEACTIVATED_DATASETS))
-    result = await update_dataset_status(
-        dataset_id=dataset_id,
-        status=DatasetStatus.ACTIVE,
-        user=ADMIN_USER,
-        expdb=expdb_test,
-    )
-    assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
-
-
-@pytest.mark.parametrize("dataset_id", [1, 33, 131])
-async def test_dataset_status_non_admin_cannot_activate(
-    dataset_id: int,
-    expdb_test: AsyncConnection,
-) -> None:
-    with pytest.raises(DatasetAdminOnlyError):
-        await update_dataset_status(
-            dataset_id=dataset_id,
-            status=DatasetStatus.ACTIVE,
-            user=SOME_USER,
-            expdb=expdb_test,
-        )
-
-
-@pytest.mark.parametrize("dataset_id", [1, 2])
-async def test_dataset_status_non_owner_cannot_deactivate(
-    dataset_id: int,
-    expdb_test: AsyncConnection,
-) -> None:
-    with pytest.raises(DatasetNotOwnedError):
-        await update_dataset_status(
-            dataset_id=dataset_id,
-            status=DatasetStatus.DEACTIVATED,
-            user=SOME_USER,
-            expdb=expdb_test,
-        )

From 7f588d9944bf091973f1e75b3e04ab60cd8b9893 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 26 Mar 2026 16:51:32 +0100
Subject: [PATCH 5/7] Fix pre-commit issues

---
 tests/routers/openml/datasets_features_test.py      | 4 +---
 tests/routers/openml/datasets_list_datasets_test.py | 5 +++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/routers/openml/datasets_features_test.py b/tests/routers/openml/datasets_features_test.py
index aa3988b5..193b0f31 100644
--- a/tests/routers/openml/datasets_features_test.py
+++ b/tests/routers/openml/datasets_features_test.py
@@ -86,9 +86,7 @@ async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
 
 
 @pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
-async def test_dataset_features_access_to_private(
-    user: User, expdb_test: AsyncConnection
-) -> None:
+async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
     features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
     assert isinstance(features, list)
 
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index 10822cc5..d8fb5735 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -217,7 +217,8 @@ async def test_list_pagination(
             expdb_db=expdb_test,
         )
     except NoResultsError:
-        assert offset == 140, "Result was expected but NoResultsError was raised."
+        expect_empty_offset = 140
+        assert offset == expect_empty_offset, "Result was expected but NoResultsError was raised."
         return
     reported_ids = {dataset["did"] for dataset in result}
     assert reported_ids == set(expected_ids)
@@ -340,6 +341,6 @@ async def test_list_data_quality(
         status=DatasetStatusFilter.ALL,
         user=None,
         expdb_db=expdb_test,
-        **{quality: range_},
+        **{quality: range_},  # type: ignore[arg-type]
     )
     assert len(result) == count

From 0b8f7b4a4a964c47259088479b6d6352666beae9 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 26 Mar 2026 17:20:47 +0100
Subject: [PATCH 6/7] trigger ci


From e16b04ea8e0506bd2a633e654a45190803f786a7 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 26 Mar 2026 17:24:07 +0100
Subject: [PATCH 7/7] Kick off CI for changes to tests

---
 .github/workflows/tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 07f64402..d0af8bca 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,6 +6,7 @@ on:
   pull_request:
     paths:
       - 'src/**'
+      - 'tests/**'
       - 'docker/**'
       - 'docker-compose.yaml'
       - 'pyproject.toml'