From 7c205aa4936373aab571c2399c5362cf146b7758 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 26 Mar 2026 16:24:49 +0100
Subject: [PATCH 1/7] Update tests to reduce amount of times py_api fixture is
used
---
tests/routers/openml/dataset_tag_test.py | 106 ++---
.../openml/datasets_list_datasets_test.py | 426 +++++++++---------
tests/routers/openml/datasets_test.py | 359 ++++++++-------
3 files changed, 462 insertions(+), 429 deletions(-)
diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py
index a9444c88..41746f83 100644
--- a/tests/routers/openml/dataset_tag_test.py
+++ b/tests/routers/openml/dataset_tag_test.py
@@ -4,10 +4,12 @@
import pytest
from sqlalchemy.ext.asyncio import AsyncConnection
-from core.errors import AuthenticationFailedError, TagAlreadyExistsError
+from core.errors import TagAlreadyExistsError
from database.datasets import get_tags_for
+from database.users import User
+from routers.openml.datasets import tag_dataset
from tests import constants
-from tests.users import ApiKey
+from tests.users import ADMIN_USER, OWNER_USER, SOME_USER, ApiKey
@pytest.mark.parametrize(
@@ -22,73 +24,71 @@ async def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: httpx.Async
json={"data_id": next(iter(constants.PRIVATE_DATASET_ID)), "tag": "test"},
)
assert response.status_code == HTTPStatus.UNAUTHORIZED
- assert response.headers["content-type"] == "application/problem+json"
- error = response.json()
- assert error["type"] == AuthenticationFailedError.uri
- assert error["code"] == "103"
+
+
+@pytest.mark.parametrize(
+ "tag",
+ ["", "h@", " a", "a" * 65],
+ ids=["too short", "@", "space", "too long"],
+)
+async def test_dataset_tag_invalid_tag_is_rejected(
+ # Constraints for the tag are handled by FastAPI
+ tag: str,
+ py_api: httpx.AsyncClient,
+) -> None:
+ new = await py_api.post(
+ f"/datasets/tag?api_key={ApiKey.ADMIN}",
+ json={"data_id": 1, "tag": tag},
+ )
+
+ assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
+ assert new.json()["detail"][0]["loc"] == ["body", "tag"]
+
+
+# ── Direct call tests: tag_dataset ──
@pytest.mark.mut
@pytest.mark.parametrize(
- "key",
- [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
+ "user",
+ [ADMIN_USER, SOME_USER, OWNER_USER],
ids=["administrator", "non-owner", "owner"],
)
-async def test_dataset_tag(
- key: ApiKey, expdb_test: AsyncConnection, py_api: httpx.AsyncClient
-) -> None:
+async def test_dataset_tag(user: User, expdb_test: AsyncConnection) -> None:
dataset_id, tag = next(iter(constants.PRIVATE_DATASET_ID)), "test"
- response = await py_api.post(
- f"/datasets/tag?api_key={key}",
- json={"data_id": dataset_id, "tag": tag},
+ result = await tag_dataset(
+ data_id=dataset_id,
+ tag=tag,
+ user=user,
+ expdb_db=expdb_test,
)
- assert response.status_code == HTTPStatus.OK
- assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}
+ assert result == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}
tags = await get_tags_for(id_=dataset_id, connection=expdb_test)
assert tag in tags
@pytest.mark.mut
-async def test_dataset_tag_returns_existing_tags(py_api: httpx.AsyncClient) -> None:
- dataset_id, tag = 1, "test"
- response = await py_api.post(
- f"/datasets/tag?api_key={ApiKey.ADMIN}",
- json={"data_id": dataset_id, "tag": tag},
+async def test_dataset_tag_returns_existing_tags(expdb_test: AsyncConnection) -> None:
+ dataset_id, tag = 1, "test" # Dataset 1 already is tagged with 'study_14'
+ result = await tag_dataset(
+ data_id=dataset_id,
+ tag=tag,
+ user=ADMIN_USER,
+ expdb_db=expdb_test,
)
- assert response.status_code == HTTPStatus.OK
- assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}
+ assert result == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}
@pytest.mark.mut
-async def test_dataset_tag_fails_if_tag_exists(py_api: httpx.AsyncClient) -> None:
+async def test_dataset_tag_fails_if_tag_exists(expdb_test: AsyncConnection) -> None:
dataset_id, tag = 1, "study_14" # Dataset 1 already is tagged with 'study_14'
- response = await py_api.post(
- f"/datasets/tag?api_key={ApiKey.ADMIN}",
- json={"data_id": dataset_id, "tag": tag},
- )
- assert response.status_code == HTTPStatus.CONFLICT
- assert response.headers["content-type"] == "application/problem+json"
- error = response.json()
- assert error["type"] == TagAlreadyExistsError.uri
- assert error["code"] == "473"
- assert str(dataset_id) in error["detail"]
- assert tag in error["detail"]
-
-
-@pytest.mark.parametrize(
- "tag",
- ["", "h@", " a", "a" * 65],
- ids=["too short", "@", "space", "too long"],
-)
-async def test_dataset_tag_invalid_tag_is_rejected(
- tag: str,
- py_api: httpx.AsyncClient,
-) -> None:
- new = await py_api.post(
- f"/datasets/tag?api_key={ApiKey.ADMIN}",
- json={"data_id": 1, "tag": tag},
- )
-
- assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
- assert new.json()["detail"][0]["loc"] == ["body", "tag"]
+ with pytest.raises(TagAlreadyExistsError) as e:
+ await tag_dataset(
+ data_id=dataset_id,
+ tag=tag,
+ user=ADMIN_USER,
+ expdb_db=expdb_test,
+ )
+ assert str(dataset_id) in e.value.detail
+ assert tag in e.value.detail
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index e619c468..ded608fc 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -7,99 +7,193 @@
import pytest
from hypothesis import given
from hypothesis import strategies as st
+from sqlalchemy.ext.asyncio import AsyncConnection
from core.errors import NoResultsError
+from database.users import User
+from routers.dependencies import Pagination
+from routers.openml.datasets import DatasetStatusFilter, list_datasets
from tests import constants
-from tests.users import ApiKey
+from tests.users import ADMIN_USER, DATASET_130_OWNER, OWNER_USER, SOME_USER, ApiKey
-def _assert_empty_result(
- response: httpx.Response,
-) -> None:
- assert response.status_code == HTTPStatus.NOT_FOUND
- assert response.headers["content-type"] == "application/problem+json"
- error = response.json()
- assert error["type"] == NoResultsError.uri
- assert error["code"] == "372"
-
-
-async def test_list(py_api: httpx.AsyncClient) -> None:
+async def test_list_route(py_api: httpx.AsyncClient) -> None:
response = await py_api.get("/datasets/list/")
assert response.status_code == HTTPStatus.OK
assert len(response.json()) >= 1
+@pytest.mark.slow
+@hypothesis.settings( # type: ignore[untyped-decorator] # 108
+ max_examples=500, # This number needs to be better motivated
+ suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture],
+ deadline=None,
+)
+@given( # type: ignore[untyped-decorator] # 108
+ number_missing_values=st.sampled_from([None, "2", "2..10000"]),
+ number_features=st.sampled_from([None, "5", "2..100"]),
+ number_classes=st.sampled_from([None, "5", "2..100"]),
+ number_instances=st.sampled_from([None, "150", "2..100"]),
+ limit=st.sampled_from([None, 1, 100, 1000]),
+ offset=st.sampled_from([None, 1, 100, 1000]),
+ status=st.sampled_from([None, "active", "deactivated", "in_preparation"]),
+ data_id=st.sampled_from([None, [61], [61, 130]]),
+ data_name=st.sampled_from([None, "abalone", "iris", "NotPresentInTheDatabase"]),
+ data_version=st.sampled_from([None, 2, 4]),
+ tag=st.sampled_from([None, "study_14", "study_not_in_db"]),
+ # We don't test ADMIN user, as we fixed a bug which treated them as a regular user
+ api_key=st.sampled_from([None, ApiKey.SOME_USER, ApiKey.OWNER_USER]),
+)
+async def test_list_data_identical(
+ py_api: httpx.AsyncClient,
+ php_api: httpx.AsyncClient,
+ **kwargs: dict[str, Any],
+) -> Any: # noqa: ANN401
+ limit, offset = kwargs["limit"], kwargs["offset"]
+ if (limit and not offset) or (offset and not limit):
+ # Behavior change: in new API these may be used independently, not in old.
+ return hypothesis.reject()
+
+ api_key = kwargs.pop("api_key")
+ api_key_query = f"?api_key={api_key}" if api_key else ""
+
+ # Pagination parameters are nested in the new query style
+ # The old style has no `limit` by default, so we mimic this with a high default
+ new_style = kwargs | {"pagination": {"limit": limit or 1_000_000}}
+ if offset is not None:
+ new_style["pagination"]["offset"] = offset
+
+ # old style `/data/filter` encodes all filters as a path
+ query = [
+ [filter_, value if not isinstance(value, list) else ",".join(str(v) for v in value)]
+ for filter_, value in kwargs.items()
+ if value is not None
+ ]
+ uri = "/data/list"
+ if query:
+ uri += f"/{'/'.join([str(v) for q in query for v in q])}"
+ uri += api_key_query
+
+ new, original = await asyncio.gather(
+ py_api.post(f"/datasets/list{api_key_query}", json=new_style),
+ php_api.get(uri),
+ )
+
+ # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
+ # and the error response format, so we can't compare error responses directly.
+ php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED
+ py_is_error = new.status_code == HTTPStatus.NOT_FOUND
+
+ if php_is_error or py_is_error:
+ # Both should be errors in the same cases
+ assert php_is_error == py_is_error, (
+ f"PHP status={original.status_code}, Python status={new.status_code}"
+ )
+ # Verify Python API returns RFC 9457 format
+ assert new.headers["content-type"] == "application/problem+json"
+ error = new.json()
+ assert error["type"] == NoResultsError.uri
+ assert error["code"] == "372"
+ assert original.json()["error"]["message"] == "No results"
+ assert error["detail"] == "No datasets match the search criteria."
+ return None
+ new_json = new.json()
+ # Qualities in new response are typed
+ for dataset in new_json:
+ for quality in dataset["quality"]:
+ quality["value"] = str(quality["value"])
+
+ # PHP API has a double nested dictionary that never has other entries
+ php_json = original.json()["data"]["dataset"]
+ assert len(php_json) == len(new_json)
+ assert php_json == new_json
+ return None
+
+
+# ── Direct call tests: list_datasets ──
+
+
@pytest.mark.parametrize(
("status", "amount"),
[
- ("active", constants.NUMBER_OF_PUBLIC_ACTIVE_DATASETS),
- ("deactivated", constants.NUMBER_OF_DEACTIVATED_DATASETS),
- ("in_preparation", constants.NUMBER_OF_DATASETS_IN_PREPARATION),
- ("all", constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
+ (DatasetStatusFilter.ACTIVE, constants.NUMBER_OF_PUBLIC_ACTIVE_DATASETS),
+ (DatasetStatusFilter.DEACTIVATED, constants.NUMBER_OF_DEACTIVATED_DATASETS),
+ (DatasetStatusFilter.IN_PREPARATION, constants.NUMBER_OF_DATASETS_IN_PREPARATION),
+ (
+ DatasetStatusFilter.ALL,
+ constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS,
+ ),
],
)
-async def test_list_filter_active(status: str, amount: int, py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- "/datasets/list",
- json={"status": status, "pagination": {"limit": constants.NUMBER_OF_DATASETS}},
+async def test_list_filter_active(
+ status: DatasetStatusFilter, amount: int, expdb_test: AsyncConnection
+) -> None:
+ result = await list_datasets(
+ pagination=Pagination(limit=constants.NUMBER_OF_DATASETS),
+ status=status,
+ user=None,
+ expdb_db=expdb_test,
)
- assert response.status_code == HTTPStatus.OK, response.json()
- assert len(response.json()) == amount
+ assert len(result) == amount
@pytest.mark.parametrize(
- ("api_key", "amount"),
+ ("user", "amount"),
[
- (ApiKey.ADMIN, constants.NUMBER_OF_DATASETS),
- (ApiKey.DATASET_130_OWNER, constants.NUMBER_OF_DATASETS),
- (ApiKey.SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
+ (ADMIN_USER, constants.NUMBER_OF_DATASETS),
+ (DATASET_130_OWNER, constants.NUMBER_OF_DATASETS),
+ (SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
(None, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
],
)
async def test_list_accounts_privacy(
- api_key: ApiKey | None, amount: int, py_api: httpx.AsyncClient
+ user: User | None, amount: int, expdb_test: AsyncConnection
) -> None:
- key = f"?api_key={api_key}" if api_key else ""
- response = await py_api.post(
- f"/datasets/list{key}",
- json={"status": "all", "pagination": {"limit": 1000}},
+ result = await list_datasets(
+ pagination=Pagination(limit=1000),
+ status=DatasetStatusFilter.ALL,
+ user=user,
+ expdb_db=expdb_test,
)
- assert response.status_code == HTTPStatus.OK, response.json()
- assert len(response.json()) == amount
+ assert len(result) == amount
@pytest.mark.parametrize(
("name", "count"),
[("abalone", 1), ("iris", 2)],
)
-async def test_list_data_name_present(name: str, count: int, py_api: httpx.AsyncClient) -> None:
- # The second iris dataset is private, so we need to authenticate.
- response = await py_api.post(
- f"/datasets/list?api_key={ApiKey.ADMIN}",
- json={"status": "all", "data_name": name},
+async def test_list_data_name_present(name: str, count: int, expdb_test: AsyncConnection) -> None:
+ # The second iris dataset is private, so we need an admin user.
+ result = await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ data_name=name,
+ user=ADMIN_USER,
+ expdb_db=expdb_test,
)
- assert response.status_code == HTTPStatus.OK
- datasets = response.json()
- assert len(datasets) == count
- assert all(dataset["name"] == name for dataset in datasets)
+ assert len(result) == count
+ assert all(dataset["name"] == name for dataset in result)
@pytest.mark.parametrize(
"name",
["ir", "long_name_without_overlap"],
)
-async def test_list_data_name_absent(name: str, py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- f"/datasets/list?api_key={ApiKey.ADMIN}",
- json={"status": "all", "data_name": name},
- )
- _assert_empty_result(response)
+async def test_list_data_name_absent(name: str, expdb_test: AsyncConnection) -> None:
+ with pytest.raises(NoResultsError):
+ await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ data_name=name,
+ user=ADMIN_USER,
+ expdb_db=expdb_test,
+ )
@pytest.mark.parametrize("limit", [None, 5, 10, 200])
-@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140, 200])
+@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140])
async def test_list_pagination(
- limit: int | None, offset: int | None, py_api: httpx.AsyncClient
+ limit: int | None, offset: int | None, expdb_test: AsyncConnection
) -> None:
# dataset ids are contiguous until 131, then there are 161, 162, and 163.
extra_datasets = [161, 162, 163]
@@ -113,17 +207,19 @@ async def test_list_pagination(
end = start + (100 if limit is None else limit)
expected_ids = all_ids[start:end]
- offset_body = {} if offset is None else {"offset": offset}
- limit_body = {} if limit is None else {"limit": limit}
- filters = {"status": "all", "pagination": offset_body | limit_body}
- response = await py_api.post("/datasets/list", json=filters)
+ pagination = Pagination(offset=offset or 0, limit=limit or 100)
- if offset in [140, 200]:
- _assert_empty_result(response)
+ try:
+ result = await list_datasets(
+ pagination=pagination,
+ status=DatasetStatusFilter.ALL,
+ user=None,
+ expdb_db=expdb_test,
+ )
+ except NoResultsError:
+ assert offset == 140, "Result was expected but NoResultsError was raised."
return
-
- assert response.status_code == HTTPStatus.OK
- reported_ids = {dataset["did"] for dataset in response.json()}
+ reported_ids = {dataset["did"] for dataset in result}
assert reported_ids == set(expected_ids)
@@ -131,85 +227,96 @@ async def test_list_pagination(
("version", "count"),
[(1, 100), (2, 7), (5, 1)],
)
-async def test_list_data_version(version: int, count: int, py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- f"/datasets/list?api_key={ApiKey.ADMIN}",
- json={"status": "all", "data_version": version},
+async def test_list_data_version(version: int, count: int, expdb_test: AsyncConnection) -> None:
+ result = await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ data_version=version,
+ user=ADMIN_USER,
+ expdb_db=expdb_test,
)
- assert response.status_code == HTTPStatus.OK
- datasets = response.json()
- assert len(datasets) == count
- assert {dataset["version"] for dataset in datasets} == {version}
+ assert len(result) == count
+ assert {dataset["version"] for dataset in result} == {version}
-async def test_list_data_version_no_result(py_api: httpx.AsyncClient) -> None:
+async def test_list_data_version_no_result(expdb_test: AsyncConnection) -> None:
version_with_no_datasets = 42
- response = await py_api.post(
- f"/datasets/list?api_key={ApiKey.ADMIN}",
- json={"status": "all", "data_version": version_with_no_datasets},
- )
- _assert_empty_result(response)
+ with pytest.raises(NoResultsError):
+ await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ data_version=version_with_no_datasets,
+ user=ADMIN_USER,
+ expdb_db=expdb_test,
+ )
-@pytest.mark.parametrize(
- "key",
- [ApiKey.SOME_USER, ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
-)
+@pytest.mark.parametrize("user", [SOME_USER, DATASET_130_OWNER, ADMIN_USER])
@pytest.mark.parametrize(
("user_id", "count"),
[(1, 59), (2, 34), (16, 1)],
)
-async def test_list_uploader(user_id: int, count: int, key: str, py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- f"/datasets/list?api_key={key}",
- json={"status": "all", "uploader": user_id},
- )
+async def test_list_uploader(
+ user_id: int, count: int, user: User, expdb_test: AsyncConnection
+) -> None:
# The dataset of user 16 is private, so can not be retrieved by other users.
owner_user_id = 16
- if key == ApiKey.SOME_USER and user_id == owner_user_id:
- _assert_empty_result(response)
- return
-
- assert response.status_code == HTTPStatus.OK
- assert len(response.json()) == count
+ try:
+ result = await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ uploader=user_id,
+ user=user,
+ expdb_db=expdb_test,
+ )
+ assert len(result) == count
+ except NoResultsError:
+ assert user is SOME_USER, "Admin and Owner should always see a result"
+ assert user_id == owner_user_id, "Only empty result should be for owner_user filter"
@pytest.mark.parametrize(
"data_id",
[[1], [1, 2, 3], [1, 2, 3, 3000], [1, 2, 3, 130]],
)
-async def test_list_data_id(data_id: list[int], py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- "/datasets/list",
- json={"status": "all", "data_id": data_id},
+async def test_list_data_id(data_id: list[int], expdb_test: AsyncConnection) -> None:
+ result = await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ data_id=data_id,
+ user=None,
+ expdb_db=expdb_test,
)
-
- assert response.status_code == HTTPStatus.OK
private_or_not_exist = {130, 3000}
- assert len(response.json()) == len(set(data_id) - private_or_not_exist)
+ expected = set(data_id) - private_or_not_exist
+ returned = {dataset["did"] for dataset in result}
+ assert returned == expected
@pytest.mark.parametrize(
("tag", "count"),
[("study_14", 100), ("study_15", 1)],
)
-async def test_list_data_tag(tag: str, count: int, py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- "/datasets/list",
- # study_14 has 100 datasets, we overwrite the default `limit` because otherwise
- # we don't know if the results are limited by filtering on the tag.
- json={"status": "all", "tag": tag, "pagination": {"limit": 101}},
+async def test_list_data_tag(tag: str, count: int, expdb_test: AsyncConnection) -> None:
+ result = await list_datasets(
+ pagination=Pagination(limit=101),
+ status=DatasetStatusFilter.ALL,
+ tag=tag,
+ user=None,
+ expdb_db=expdb_test,
)
- assert response.status_code == HTTPStatus.OK
- assert len(response.json()) == count
+ assert len(result) == count
-async def test_list_data_tag_empty(py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- "/datasets/list",
- json={"status": "all", "tag": "not-a-tag"},
- )
- _assert_empty_result(response)
+async def test_list_data_tag_empty(expdb_test: AsyncConnection) -> None:
+ with pytest.raises(NoResultsError):
+ await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ tag="not-a-tag",
+ user=None,
+ expdb_db=expdb_test,
+ )
@pytest.mark.parametrize(
@@ -226,98 +333,13 @@ async def test_list_data_tag_empty(py_api: httpx.AsyncClient) -> None:
],
)
async def test_list_data_quality(
- quality: str, range_: str, count: int, py_api: httpx.AsyncClient
+ quality: str, range_: str, count: int, expdb_test: AsyncConnection
) -> None:
- response = await py_api.post(
- "/datasets/list",
- json={"status": "all", quality: range_},
+ result = await list_datasets(
+ pagination=Pagination(),
+ status=DatasetStatusFilter.ALL,
+ user=None,
+ expdb_db=expdb_test,
+ **{quality: range_},
)
- assert response.status_code == HTTPStatus.OK, response.json()
- assert len(response.json()) == count
-
-
-@pytest.mark.slow
-@hypothesis.settings( # type: ignore[untyped-decorator] # 108
- max_examples=500, # This number needs to be better motivated
- suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture],
- deadline=None,
-)
-@given( # type: ignore[untyped-decorator] # 108
- number_missing_values=st.sampled_from([None, "2", "2..10000"]),
- number_features=st.sampled_from([None, "5", "2..100"]),
- number_classes=st.sampled_from([None, "5", "2..100"]),
- number_instances=st.sampled_from([None, "150", "2..100"]),
- limit=st.sampled_from([None, 1, 100, 1000]),
- offset=st.sampled_from([None, 1, 100, 1000]),
- status=st.sampled_from([None, "active", "deactivated", "in_preparation"]),
- data_id=st.sampled_from([None, [61], [61, 130]]),
- data_name=st.sampled_from([None, "abalone", "iris", "NotPresentInTheDatabase"]),
- data_version=st.sampled_from([None, 2, 4]),
- tag=st.sampled_from([None, "study_14", "study_not_in_db"]),
- # We don't test ADMIN user, as we fixed a bug which treated them as a regular user
- api_key=st.sampled_from([None, ApiKey.SOME_USER, ApiKey.OWNER_USER]),
-)
-async def test_list_data_identical(
- py_api: httpx.AsyncClient,
- php_api: httpx.AsyncClient,
- **kwargs: dict[str, Any],
-) -> Any: # noqa: ANN401
- limit, offset = kwargs["limit"], kwargs["offset"]
- if (limit and not offset) or (offset and not limit):
- # Behavior change: in new API these may be used independently, not in old.
- return hypothesis.reject()
-
- api_key = kwargs.pop("api_key")
- api_key_query = f"?api_key={api_key}" if api_key else ""
-
- # Pagination parameters are nested in the new query style
- # The old style has no `limit` by default, so we mimic this with a high default
- new_style = kwargs | {"pagination": {"limit": limit or 1_000_000}}
- if offset is not None:
- new_style["pagination"]["offset"] = offset
-
- # old style `/data/filter` encodes all filters as a path
- query = [
- [filter_, value if not isinstance(value, list) else ",".join(str(v) for v in value)]
- for filter_, value in kwargs.items()
- if value is not None
- ]
- uri = "/data/list"
- if query:
- uri += f"/{'/'.join([str(v) for q in query for v in q])}"
- uri += api_key_query
-
- new, original = await asyncio.gather(
- py_api.post(f"/datasets/list{api_key_query}", json=new_style),
- php_api.get(uri),
- )
-
- # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
- # and the error response format, so we can't compare error responses directly.
- php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED
- py_is_error = new.status_code == HTTPStatus.NOT_FOUND
-
- if php_is_error or py_is_error:
- # Both should be errors in the same cases
- assert php_is_error == py_is_error, (
- f"PHP status={original.status_code}, Python status={new.status_code}"
- )
- # Verify Python API returns RFC 9457 format
- assert new.headers["content-type"] == "application/problem+json"
- error = new.json()
- assert error["type"] == NoResultsError.uri
- assert error["code"] == "372"
- assert original.json()["error"]["message"] == "No results"
- assert error["detail"] == "No datasets match the search criteria."
- return None
- new_json = new.json()
- # Qualities in new response are typed
- for dataset in new_json:
- for quality in dataset["quality"]:
- quality["value"] = str(quality["value"])
-
- # PHP API has a double nested dictionary that never has other entries
- php_json = original.json()["data"]["dataset"]
- assert len(php_json) == len(new_json)
- assert php_json == new_json
- return None
+ assert len(result) == count
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index 91ef5bee..f1e4a2fd 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -7,48 +7,27 @@
from sqlalchemy.ext.asyncio import AsyncConnection
from core.errors import (
+ DatasetAdminOnlyError,
DatasetNoAccessError,
DatasetNotFoundError,
+ DatasetNotOwnedError,
DatasetProcessingError,
)
from database.users import User
-from routers.openml.datasets import get_dataset
+from routers.openml.datasets import get_dataset, get_dataset_features, update_dataset_status
from schemas.datasets.openml import DatasetMetadata, DatasetStatus
from tests import constants
from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey
-@pytest.mark.parametrize(
- ("dataset_id", "response_code"),
- [
- (-1, HTTPStatus.NOT_FOUND),
- (138, HTTPStatus.NOT_FOUND),
- (100_000, HTTPStatus.NOT_FOUND),
- ],
-)
-async def test_error_unknown_dataset(
- dataset_id: int,
- response_code: int,
- py_api: httpx.AsyncClient,
-) -> None:
- response = await py_api.get(f"/datasets/{dataset_id}")
-
- assert response.status_code == response_code
- assert response.headers["content-type"] == "application/problem+json"
- error = response.json()
- assert error["type"] == DatasetNotFoundError.uri
- assert error["title"] == "Dataset Not Found"
- assert error["status"] == HTTPStatus.NOT_FOUND
- assert re.match(r"No dataset with id -?\d+ found.", error["detail"])
- assert error["code"] == "111"
+# ── py_api: routing + serialization, RFC 9457 format, regression ──
-async def test_get_dataset(py_api: httpx.AsyncClient) -> None:
+async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
response = await py_api.get("/datasets/1")
assert response.status_code == HTTPStatus.OK
description = response.json()
assert description.pop("description").startswith("**Author**:")
-
assert description == {
"id": 1,
"name": "anneal",
@@ -81,48 +60,7 @@ async def test_get_dataset(py_api: httpx.AsyncClient) -> None:
}
-@pytest.mark.parametrize(
- "user",
- [
- NO_USER,
- SOME_USER,
- ],
-)
-async def test_private_dataset_no_access(
- user: User | None,
- expdb_test: AsyncConnection,
- user_test: AsyncConnection,
-) -> None:
- with pytest.raises(DatasetNoAccessError) as e:
- await get_dataset(
- dataset_id=130,
- user=user,
- user_db=user_test,
- expdb_db=expdb_test,
- )
- assert e.value.status_code == HTTPStatus.FORBIDDEN
- assert e.value.uri == DatasetNoAccessError.uri
- no_access = 112
- assert e.value.code == no_access
-
-
-@pytest.mark.parametrize(
- "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
-)
-async def test_private_dataset_access(
- user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
-) -> None:
- dataset = await get_dataset(
- dataset_id=130,
- user=user,
- user_db=user_test,
- expdb_db=expdb_test,
- )
- assert isinstance(dataset, DatasetMetadata)
-
-
-async def test_dataset_features(py_api: httpx.AsyncClient) -> None:
- # Dataset 4 has both nominal and numerical features, so provides reasonable coverage
+async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
response = await py_api.get("/datasets/features/4")
assert response.status_code == HTTPStatus.OK
assert response.json() == [
@@ -175,161 +113,234 @@ async def test_dataset_features(py_api: httpx.AsyncClient) -> None:
]
-async def test_dataset_features_with_ontology(py_api: httpx.AsyncClient) -> None:
- # Dataset 11 has ontology data for features 1, 2, and 3
- response = await py_api.get("/datasets/features/11")
+async def test_update_status_via_api(py_api: httpx.AsyncClient) -> None:
+ response = await py_api.post(
+ "/datasets/status/update",
+ json={"dataset_id": 1, "status": "active"},
+ )
+ # Without authentication, we expect 401 — confirms the route is wired up.
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+
+async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
+ """Single test for the generic RFC 9457 exception handler — covers all error types."""
+ response = await py_api.get("/datasets/100000")
+ assert response.status_code == HTTPStatus.NOT_FOUND
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == DatasetNotFoundError.uri
+ assert error["title"] == "Dataset Not Found"
+ assert error["status"] == HTTPStatus.NOT_FOUND
+ assert re.match(r"No dataset with id \d+ found.", error["detail"])
+ assert error["code"] == "111"
+
+
+@pytest.mark.mut
+async def test_dataset_no_500_with_multiple_processing_entries(
+ py_api: httpx.AsyncClient,
+ expdb_test: AsyncConnection,
+) -> None:
+ """Regression test for issue #145: multiple processing entries caused 500."""
+ await expdb_test.execute(
+ text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
+ )
+ await expdb_test.execute(
+ text(
+ "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
+ "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
+ ),
+ )
+ response = await py_api.get("/datasets/1")
assert response.status_code == HTTPStatus.OK
- features = {f["index"]: f for f in response.json()}
- assert features[1]["ontology"] == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
- assert features[2]["ontology"] == [
+
+
+# ── Direct call tests: get_dataset ──
+
+
+@pytest.mark.parametrize(
+ "dataset_id",
+ [-1, 138, 100_000],
+)
+async def test_get_dataset_not_found(
+ dataset_id: int,
+ expdb_test: AsyncConnection,
+ user_test: AsyncConnection,
+) -> None:
+ with pytest.raises(DatasetNotFoundError):
+ await get_dataset(
+ dataset_id=dataset_id,
+ user=None,
+ user_db=user_test,
+ expdb_db=expdb_test,
+ )
+
+
+@pytest.mark.parametrize(
+ "user",
+ [
+ NO_USER,
+ SOME_USER,
+ ],
+)
+async def test_private_dataset_no_access(
+ user: User | None,
+ expdb_test: AsyncConnection,
+ user_test: AsyncConnection,
+) -> None:
+ with pytest.raises(DatasetNoAccessError) as e:
+ await get_dataset(
+ dataset_id=130,
+ user=user,
+ user_db=user_test,
+ expdb_db=expdb_test,
+ )
+ assert e.value.status_code == HTTPStatus.FORBIDDEN
+ assert e.value.uri == DatasetNoAccessError.uri
+ no_access = 112
+ assert e.value.code == no_access
+
+
+@pytest.mark.parametrize(
+ "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
+)
+async def test_private_dataset_access(
+ user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
+) -> None:
+ dataset = await get_dataset(
+ dataset_id=130,
+ user=user,
+ user_db=user_test,
+ expdb_db=expdb_test,
+ )
+ assert isinstance(dataset, DatasetMetadata)
+
+
+# ── Direct call tests: get_dataset_features ──
+
+
+async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
+ features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
+ by_index = {f.index: f for f in features}
+ assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
+ assert by_index[2].ontology == [
"https://en.wikipedia.org/wiki/Car_door",
"https://en.wikipedia.org/wiki/Door",
]
- assert features[3]["ontology"] == [
+ assert by_index[3].ontology == [
"https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
]
- # Features without ontology should not include the field
- assert "ontology" not in features[0]
- assert "ontology" not in features[4]
+ assert by_index[0].ontology is None
+ assert by_index[4].ontology is None
-async def test_dataset_features_no_access(py_api: httpx.AsyncClient) -> None:
- response = await py_api.get("/datasets/features/130")
- assert response.status_code == HTTPStatus.FORBIDDEN
+async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
+ with pytest.raises(DatasetNoAccessError):
+ await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)
-@pytest.mark.parametrize(
- "api_key",
- [ApiKey.ADMIN, ApiKey.DATASET_130_OWNER],
-)
+@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
async def test_dataset_features_access_to_private(
- api_key: ApiKey, py_api: httpx.AsyncClient
+ user: User, expdb_test: AsyncConnection
) -> None:
- response = await py_api.get(f"/datasets/features/130?api_key={api_key}")
- assert response.status_code == HTTPStatus.OK
+ features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
+ assert isinstance(features, list)
-async def test_dataset_features_with_processing_error(py_api: httpx.AsyncClient) -> None:
- # When a dataset is processed to extract its feature metadata, errors may occur.
- # In that case, no feature information will ever be available.
+async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
dataset_id = 55
- response = await py_api.get(f"/datasets/features/{dataset_id}")
- assert response.status_code == HTTPStatus.PRECONDITION_FAILED
- assert response.headers["content-type"] == "application/problem+json"
- error = response.json()
- assert error["type"] == DatasetProcessingError.uri
- assert error["code"] == "274"
- assert "No features found" in error["detail"]
- assert str(dataset_id) in error["detail"]
+ with pytest.raises(DatasetProcessingError) as e:
+ await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
+ assert "No features found" in e.value.detail
+ assert str(dataset_id) in e.value.detail
-async def test_dataset_features_dataset_does_not_exist(py_api: httpx.AsyncClient) -> None:
- resource = await py_api.get("/datasets/features/1000")
- assert resource.status_code == HTTPStatus.NOT_FOUND
+async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
+ with pytest.raises(DatasetNotFoundError):
+ await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
-async def _assert_status_update_is_successful(
- apikey: ApiKey,
- dataset_id: int,
- status: str,
- py_api: httpx.AsyncClient,
-) -> None:
- response = await py_api.post(
- f"/datasets/status/update?api_key={apikey}",
- json={"dataset_id": dataset_id, "status": status},
- )
- assert response.status_code == HTTPStatus.OK
- assert response.json() == {
- "dataset_id": dataset_id,
- "status": status,
- }
+# ── Direct call tests: update_dataset_status ──
@pytest.mark.mut
-@pytest.mark.parametrize(
- "dataset_id",
- [3, 4],
-)
+@pytest.mark.parametrize("dataset_id", [3, 4])
async def test_dataset_status_update_active_to_deactivated(
- dataset_id: int, py_api: httpx.AsyncClient
+ dataset_id: int, expdb_test: AsyncConnection
) -> None:
- await _assert_status_update_is_successful(
- apikey=ApiKey.ADMIN,
+ result = await update_dataset_status(
dataset_id=dataset_id,
status=DatasetStatus.DEACTIVATED,
- py_api=py_api,
+ user=ADMIN_USER,
+ expdb=expdb_test,
)
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
@pytest.mark.mut
-async def test_dataset_status_update_in_preparation_to_active(py_api: httpx.AsyncClient) -> None:
- await _assert_status_update_is_successful(
- apikey=ApiKey.ADMIN,
- dataset_id=next(iter(constants.IN_PREPARATION_ID)),
+async def test_dataset_status_update_in_preparation_to_active(
+ expdb_test: AsyncConnection,
+) -> None:
+ dataset_id = next(iter(constants.IN_PREPARATION_ID))
+ result = await update_dataset_status(
+ dataset_id=dataset_id,
status=DatasetStatus.ACTIVE,
- py_api=py_api,
+ user=ADMIN_USER,
+ expdb=expdb_test,
)
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
@pytest.mark.mut
async def test_dataset_status_update_in_preparation_to_deactivated(
- py_api: httpx.AsyncClient,
+ expdb_test: AsyncConnection,
) -> None:
- await _assert_status_update_is_successful(
- apikey=ApiKey.ADMIN,
- dataset_id=next(iter(constants.IN_PREPARATION_ID)),
+ dataset_id = next(iter(constants.IN_PREPARATION_ID))
+ result = await update_dataset_status(
+ dataset_id=dataset_id,
status=DatasetStatus.DEACTIVATED,
- py_api=py_api,
+ user=ADMIN_USER,
+ expdb=expdb_test,
)
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
@pytest.mark.mut
-async def test_dataset_status_update_deactivated_to_active(py_api: httpx.AsyncClient) -> None:
- await _assert_status_update_is_successful(
- apikey=ApiKey.ADMIN,
- dataset_id=next(iter(constants.DEACTIVATED_DATASETS)),
+async def test_dataset_status_update_deactivated_to_active(
+ expdb_test: AsyncConnection,
+) -> None:
+ dataset_id = next(iter(constants.DEACTIVATED_DATASETS))
+ result = await update_dataset_status(
+ dataset_id=dataset_id,
status=DatasetStatus.ACTIVE,
- py_api=py_api,
+ user=ADMIN_USER,
+ expdb=expdb_test,
)
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
-@pytest.mark.parametrize(
- ("dataset_id", "api_key", "status"),
- [
- (1, ApiKey.SOME_USER, DatasetStatus.ACTIVE),
- (1, ApiKey.SOME_USER, DatasetStatus.DEACTIVATED),
- (2, ApiKey.SOME_USER, DatasetStatus.DEACTIVATED),
- (33, ApiKey.SOME_USER, DatasetStatus.ACTIVE),
- (131, ApiKey.SOME_USER, DatasetStatus.ACTIVE),
- ],
-)
-async def test_dataset_status_unauthorized(
+@pytest.mark.parametrize("dataset_id", [1, 33, 131])
+async def test_dataset_status_non_admin_cannot_activate(
dataset_id: int,
- api_key: ApiKey,
- status: str,
- py_api: httpx.AsyncClient,
+ expdb_test: AsyncConnection,
) -> None:
- response = await py_api.post(
- f"/datasets/status/update?api_key={api_key}",
- json={"dataset_id": dataset_id, "status": status},
- )
- assert response.status_code == HTTPStatus.FORBIDDEN
+ with pytest.raises(DatasetAdminOnlyError):
+ await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.ACTIVE,
+ user=SOME_USER,
+ expdb=expdb_test,
+ )
-@pytest.mark.mut
-async def test_dataset_no_500_with_multiple_processing_entries(
- py_api: httpx.AsyncClient,
+@pytest.mark.parametrize("dataset_id", [1, 2])
+async def test_dataset_status_non_owner_cannot_deactivate(
+ dataset_id: int,
expdb_test: AsyncConnection,
) -> None:
- """Regression test for issue #145: multiple processing entries caused 500."""
- await expdb_test.execute(
- text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
- )
- await expdb_test.execute(
- text(
- "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
- "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
- ),
- )
- response = await py_api.get("/datasets/1")
- assert response.status_code == HTTPStatus.OK
+ with pytest.raises(DatasetNotOwnedError):
+ await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.DEACTIVATED,
+ user=SOME_USER,
+ expdb=expdb_test,
+ )
From a4f5cdc76e64a064f1610a4517e55a97f970c81b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
<66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 26 Mar 2026 15:29:15 +0000
Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---
tests/routers/openml/datasets_list_datasets_test.py | 2 +-
tests/routers/openml/datasets_test.py | 7 ++-----
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index ded608fc..10822cc5 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -14,7 +14,7 @@
from routers.dependencies import Pagination
from routers.openml.datasets import DatasetStatusFilter, list_datasets
from tests import constants
-from tests.users import ADMIN_USER, DATASET_130_OWNER, OWNER_USER, SOME_USER, ApiKey
+from tests.users import ADMIN_USER, DATASET_130_OWNER, SOME_USER, ApiKey
async def test_list_route(py_api: httpx.AsyncClient) -> None:
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index f1e4a2fd..aaedded3 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -17,8 +17,7 @@
from routers.openml.datasets import get_dataset, get_dataset_features, update_dataset_status
from schemas.datasets.openml import DatasetMetadata, DatasetStatus
from tests import constants
-from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey
-
+from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
# ── py_api: routing + serialization, RFC 9457 format, regression ──
@@ -239,9 +238,7 @@ async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
-async def test_dataset_features_access_to_private(
- user: User, expdb_test: AsyncConnection
-) -> None:
+async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
assert isinstance(features, list)
From e2c27b5d3a984bc34794fd735edb593b7e97ecff Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 26 Mar 2026 16:44:23 +0100
Subject: [PATCH 3/7] Separate out tests to one file per endpoint
---
.../routers/openml/datasets_features_test.py | 106 +++++++++++++
tests/routers/openml/datasets_get_test.py | 142 ++++++++++++++++++
tests/routers/openml/datasets_status_test.py | 106 +++++++++++++
3 files changed, 354 insertions(+)
create mode 100644 tests/routers/openml/datasets_features_test.py
create mode 100644 tests/routers/openml/datasets_get_test.py
create mode 100644 tests/routers/openml/datasets_status_test.py
diff --git a/tests/routers/openml/datasets_features_test.py b/tests/routers/openml/datasets_features_test.py
new file mode 100644
index 00000000..aa3988b5
--- /dev/null
+++ b/tests/routers/openml/datasets_features_test.py
@@ -0,0 +1,106 @@
+"""Tests for the GET /datasets/features/{dataset_id} endpoint."""
+
+from http import HTTPStatus
+
+import httpx
+import pytest
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from core.errors import DatasetNoAccessError, DatasetNotFoundError, DatasetProcessingError
+from database.users import User
+from routers.openml.datasets import get_dataset_features
+from tests.users import ADMIN_USER, DATASET_130_OWNER
+
+
+async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
+ response = await py_api.get("/datasets/features/4")
+ assert response.status_code == HTTPStatus.OK
+ assert response.json() == [
+ {
+ "index": 0,
+ "name": "left-weight",
+ "data_type": "numeric",
+ "is_target": False,
+ "is_ignore": False,
+ "is_row_identifier": False,
+ "number_of_missing_values": 0,
+ },
+ {
+ "index": 1,
+ "name": "left-distance",
+ "data_type": "numeric",
+ "is_target": False,
+ "is_ignore": False,
+ "is_row_identifier": False,
+ "number_of_missing_values": 0,
+ },
+ {
+ "index": 2,
+ "name": "right-weight",
+ "data_type": "numeric",
+ "is_target": False,
+ "is_ignore": False,
+ "is_row_identifier": False,
+ "number_of_missing_values": 0,
+ },
+ {
+ "index": 3,
+ "name": "right-distance",
+ "data_type": "numeric",
+ "is_target": False,
+ "is_ignore": False,
+ "is_row_identifier": False,
+ "number_of_missing_values": 0,
+ },
+ {
+ "index": 4,
+ "name": "class",
+ "data_type": "nominal",
+ "nominal_values": ["B", "L", "R"],
+ "is_target": True,
+ "is_ignore": False,
+ "is_row_identifier": False,
+ "number_of_missing_values": 0,
+ },
+ ]
+
+
+async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
+ features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
+ by_index = {f.index: f for f in features}
+ assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
+ assert by_index[2].ontology == [
+ "https://en.wikipedia.org/wiki/Car_door",
+ "https://en.wikipedia.org/wiki/Door",
+ ]
+ assert by_index[3].ontology == [
+ "https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
+ ]
+ assert by_index[0].ontology is None
+ assert by_index[4].ontology is None
+
+
+async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
+ with pytest.raises(DatasetNoAccessError):
+ await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)
+
+
+@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
+async def test_dataset_features_access_to_private(
+ user: User, expdb_test: AsyncConnection
+) -> None:
+ features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
+ assert isinstance(features, list)
+
+
+async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
+ dataset_id = 55
+ with pytest.raises(DatasetProcessingError) as e:
+ await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
+ assert "No features found" in e.value.detail
+ assert str(dataset_id) in e.value.detail
+
+
+async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
+ with pytest.raises(DatasetNotFoundError):
+ await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
diff --git a/tests/routers/openml/datasets_get_test.py b/tests/routers/openml/datasets_get_test.py
new file mode 100644
index 00000000..fe67abee
--- /dev/null
+++ b/tests/routers/openml/datasets_get_test.py
@@ -0,0 +1,142 @@
+"""Tests for the GET /datasets/{dataset_id} endpoint."""
+
+import re
+from http import HTTPStatus
+
+import httpx
+import pytest
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from core.errors import DatasetNoAccessError, DatasetNotFoundError
+from database.users import User
+from routers.openml.datasets import get_dataset
+from schemas.datasets.openml import DatasetMetadata
+from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
+
+
+async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
+ response = await py_api.get("/datasets/1")
+ assert response.status_code == HTTPStatus.OK
+ description = response.json()
+ assert description.pop("description").startswith("**Author**:")
+ assert description == {
+ "id": 1,
+ "name": "anneal",
+ "version": 1,
+ "format": "arff",
+ "description_version": 1,
+ "upload_date": "2014-04-06T23:19:24",
+ "licence": "Public",
+ "url": "http://php-api/data/v1/download/1/anneal.arff",
+ "parquet_url": "http://minio:9000/datasets/0000/0001/dataset_1.pq",
+ "file_id": 1,
+ "default_target_attribute": ["class"],
+ "version_label": "1",
+ "tag": ["study_14"],
+ "visibility": "public",
+ "status": "active",
+ "processing_date": "2024-01-04T10:13:59",
+ "md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",
+ "row_id_attribute": [],
+ "ignore_attribute": [],
+ "language": "",
+ "error": None,
+ "warning": None,
+ "citation": "",
+ "collection_date": None,
+ "contributor": [],
+ "creator": [],
+ "paper_url": None,
+ "original_data_url": [],
+ }
+
+
+async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
+ """Single test for the generic RFC 9457 exception handler — covers all error types."""
+ response = await py_api.get("/datasets/100000")
+ assert response.status_code == HTTPStatus.NOT_FOUND
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == DatasetNotFoundError.uri
+ assert error["title"] == "Dataset Not Found"
+ assert error["status"] == HTTPStatus.NOT_FOUND
+ assert re.match(r"No dataset with id \d+ found.", error["detail"])
+ assert error["code"] == "111"
+
+
+@pytest.mark.mut
+async def test_dataset_no_500_with_multiple_processing_entries(
+ py_api: httpx.AsyncClient,
+ expdb_test: AsyncConnection,
+) -> None:
+ """Regression test for issue #145: multiple processing entries caused 500."""
+ await expdb_test.execute(
+ text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
+ )
+ await expdb_test.execute(
+ text(
+ "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
+ "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
+ ),
+ )
+ response = await py_api.get("/datasets/1")
+ assert response.status_code == HTTPStatus.OK
+
+
+@pytest.mark.parametrize(
+ "dataset_id",
+ [-1, 138, 100_000],
+)
+async def test_get_dataset_not_found(
+ dataset_id: int,
+ expdb_test: AsyncConnection,
+ user_test: AsyncConnection,
+) -> None:
+ with pytest.raises(DatasetNotFoundError):
+ await get_dataset(
+ dataset_id=dataset_id,
+ user=None,
+ user_db=user_test,
+ expdb_db=expdb_test,
+ )
+
+
+@pytest.mark.parametrize(
+ "user",
+ [
+ NO_USER,
+ SOME_USER,
+ ],
+)
+async def test_private_dataset_no_access(
+ user: User | None,
+ expdb_test: AsyncConnection,
+ user_test: AsyncConnection,
+) -> None:
+ with pytest.raises(DatasetNoAccessError) as e:
+ await get_dataset(
+ dataset_id=130,
+ user=user,
+ user_db=user_test,
+ expdb_db=expdb_test,
+ )
+ assert e.value.status_code == HTTPStatus.FORBIDDEN
+ assert e.value.uri == DatasetNoAccessError.uri
+ no_access = 112
+ assert e.value.code == no_access
+
+
+@pytest.mark.parametrize(
+ "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
+)
+async def test_private_dataset_access(
+ user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
+) -> None:
+ dataset = await get_dataset(
+ dataset_id=130,
+ user=user,
+ user_db=user_test,
+ expdb_db=expdb_test,
+ )
+ assert isinstance(dataset, DatasetMetadata)
diff --git a/tests/routers/openml/datasets_status_test.py b/tests/routers/openml/datasets_status_test.py
new file mode 100644
index 00000000..1e2271fc
--- /dev/null
+++ b/tests/routers/openml/datasets_status_test.py
@@ -0,0 +1,106 @@
+"""Tests for the POST /datasets/status/update endpoint."""
+
+from http import HTTPStatus
+
+import httpx
+import pytest
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from core.errors import DatasetAdminOnlyError, DatasetNotOwnedError
+from routers.openml.datasets import update_dataset_status
+from schemas.datasets.openml import DatasetStatus
+from tests import constants
+from tests.users import ADMIN_USER, SOME_USER
+
+
+async def test_update_status_via_api(py_api: httpx.AsyncClient) -> None:
+ response = await py_api.post(
+ "/datasets/status/update",
+ json={"dataset_id": 1, "status": "active"},
+ )
+ # Without authentication, we expect 401 — confirms the route is wired up.
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+
+@pytest.mark.mut
+@pytest.mark.parametrize("dataset_id", [3, 4])
+async def test_dataset_status_update_active_to_deactivated(
+ dataset_id: int, expdb_test: AsyncConnection
+) -> None:
+ result = await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.DEACTIVATED,
+ user=ADMIN_USER,
+ expdb=expdb_test,
+ )
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
+
+
+@pytest.mark.mut
+async def test_dataset_status_update_in_preparation_to_active(
+ expdb_test: AsyncConnection,
+) -> None:
+ dataset_id = next(iter(constants.IN_PREPARATION_ID))
+ result = await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.ACTIVE,
+ user=ADMIN_USER,
+ expdb=expdb_test,
+ )
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
+
+
+@pytest.mark.mut
+async def test_dataset_status_update_in_preparation_to_deactivated(
+ expdb_test: AsyncConnection,
+) -> None:
+ dataset_id = next(iter(constants.IN_PREPARATION_ID))
+ result = await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.DEACTIVATED,
+ user=ADMIN_USER,
+ expdb=expdb_test,
+ )
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
+
+
+@pytest.mark.mut
+async def test_dataset_status_update_deactivated_to_active(
+ expdb_test: AsyncConnection,
+) -> None:
+ dataset_id = next(iter(constants.DEACTIVATED_DATASETS))
+ result = await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.ACTIVE,
+ user=ADMIN_USER,
+ expdb=expdb_test,
+ )
+ assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
+
+
+@pytest.mark.parametrize("dataset_id", [1, 33, 131])
+async def test_dataset_status_non_admin_cannot_activate(
+ dataset_id: int,
+ expdb_test: AsyncConnection,
+) -> None:
+ with pytest.raises(DatasetAdminOnlyError):
+ await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.ACTIVE,
+ user=SOME_USER,
+ expdb=expdb_test,
+ )
+
+
+@pytest.mark.parametrize("dataset_id", [1, 2])
+async def test_dataset_status_non_owner_cannot_deactivate(
+ dataset_id: int,
+ expdb_test: AsyncConnection,
+) -> None:
+ with pytest.raises(DatasetNotOwnedError):
+ await update_dataset_status(
+ dataset_id=dataset_id,
+ status=DatasetStatus.DEACTIVATED,
+ user=SOME_USER,
+ expdb=expdb_test,
+ )
From 6b3a0141f6a7deb7e5156d023cac41bab22f4b85 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 26 Mar 2026 16:44:50 +0100
Subject: [PATCH 4/7] Remove old test file
---
tests/routers/openml/datasets_test.py | 343 --------------------------
1 file changed, 343 deletions(-)
delete mode 100644 tests/routers/openml/datasets_test.py
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
deleted file mode 100644
index aaedded3..00000000
--- a/tests/routers/openml/datasets_test.py
+++ /dev/null
@@ -1,343 +0,0 @@
-import re
-from http import HTTPStatus
-
-import httpx
-import pytest
-from sqlalchemy import text
-from sqlalchemy.ext.asyncio import AsyncConnection
-
-from core.errors import (
- DatasetAdminOnlyError,
- DatasetNoAccessError,
- DatasetNotFoundError,
- DatasetNotOwnedError,
- DatasetProcessingError,
-)
-from database.users import User
-from routers.openml.datasets import get_dataset, get_dataset_features, update_dataset_status
-from schemas.datasets.openml import DatasetMetadata, DatasetStatus
-from tests import constants
-from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
-
-# ── py_api: routing + serialization, RFC 9457 format, regression ──
-
-
-async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
- response = await py_api.get("/datasets/1")
- assert response.status_code == HTTPStatus.OK
- description = response.json()
- assert description.pop("description").startswith("**Author**:")
- assert description == {
- "id": 1,
- "name": "anneal",
- "version": 1,
- "format": "arff",
- "description_version": 1,
- "upload_date": "2014-04-06T23:19:24",
- "licence": "Public",
- "url": "http://php-api/data/v1/download/1/anneal.arff",
- "parquet_url": "http://minio:9000/datasets/0000/0001/dataset_1.pq",
- "file_id": 1,
- "default_target_attribute": ["class"],
- "version_label": "1",
- "tag": ["study_14"],
- "visibility": "public",
- "status": "active",
- "processing_date": "2024-01-04T10:13:59",
- "md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",
- "row_id_attribute": [],
- "ignore_attribute": [],
- "language": "",
- "error": None,
- "warning": None,
- "citation": "",
- "collection_date": None,
- "contributor": [],
- "creator": [],
- "paper_url": None,
- "original_data_url": [],
- }
-
-
-async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
- response = await py_api.get("/datasets/features/4")
- assert response.status_code == HTTPStatus.OK
- assert response.json() == [
- {
- "index": 0,
- "name": "left-weight",
- "data_type": "numeric",
- "is_target": False,
- "is_ignore": False,
- "is_row_identifier": False,
- "number_of_missing_values": 0,
- },
- {
- "index": 1,
- "name": "left-distance",
- "data_type": "numeric",
- "is_target": False,
- "is_ignore": False,
- "is_row_identifier": False,
- "number_of_missing_values": 0,
- },
- {
- "index": 2,
- "name": "right-weight",
- "data_type": "numeric",
- "is_target": False,
- "is_ignore": False,
- "is_row_identifier": False,
- "number_of_missing_values": 0,
- },
- {
- "index": 3,
- "name": "right-distance",
- "data_type": "numeric",
- "is_target": False,
- "is_ignore": False,
- "is_row_identifier": False,
- "number_of_missing_values": 0,
- },
- {
- "index": 4,
- "name": "class",
- "data_type": "nominal",
- "nominal_values": ["B", "L", "R"],
- "is_target": True,
- "is_ignore": False,
- "is_row_identifier": False,
- "number_of_missing_values": 0,
- },
- ]
-
-
-async def test_update_status_via_api(py_api: httpx.AsyncClient) -> None:
- response = await py_api.post(
- "/datasets/status/update",
- json={"dataset_id": 1, "status": "active"},
- )
- # Without authentication, we expect 401 — confirms the route is wired up.
- assert response.status_code == HTTPStatus.UNAUTHORIZED
-
-
-async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
- """Single test for the generic RFC 9457 exception handler — covers all error types."""
- response = await py_api.get("/datasets/100000")
- assert response.status_code == HTTPStatus.NOT_FOUND
- assert response.headers["content-type"] == "application/problem+json"
- error = response.json()
- assert error["type"] == DatasetNotFoundError.uri
- assert error["title"] == "Dataset Not Found"
- assert error["status"] == HTTPStatus.NOT_FOUND
- assert re.match(r"No dataset with id \d+ found.", error["detail"])
- assert error["code"] == "111"
-
-
-@pytest.mark.mut
-async def test_dataset_no_500_with_multiple_processing_entries(
- py_api: httpx.AsyncClient,
- expdb_test: AsyncConnection,
-) -> None:
- """Regression test for issue #145: multiple processing entries caused 500."""
- await expdb_test.execute(
- text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
- )
- await expdb_test.execute(
- text(
- "INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
- "VALUES (1, 99, 2, '2020-01-01 00:00:00')",
- ),
- )
- response = await py_api.get("/datasets/1")
- assert response.status_code == HTTPStatus.OK
-
-
-# ── Direct call tests: get_dataset ──
-
-
-@pytest.mark.parametrize(
- "dataset_id",
- [-1, 138, 100_000],
-)
-async def test_get_dataset_not_found(
- dataset_id: int,
- expdb_test: AsyncConnection,
- user_test: AsyncConnection,
-) -> None:
- with pytest.raises(DatasetNotFoundError):
- await get_dataset(
- dataset_id=dataset_id,
- user=None,
- user_db=user_test,
- expdb_db=expdb_test,
- )
-
-
-@pytest.mark.parametrize(
- "user",
- [
- NO_USER,
- SOME_USER,
- ],
-)
-async def test_private_dataset_no_access(
- user: User | None,
- expdb_test: AsyncConnection,
- user_test: AsyncConnection,
-) -> None:
- with pytest.raises(DatasetNoAccessError) as e:
- await get_dataset(
- dataset_id=130,
- user=user,
- user_db=user_test,
- expdb_db=expdb_test,
- )
- assert e.value.status_code == HTTPStatus.FORBIDDEN
- assert e.value.uri == DatasetNoAccessError.uri
- no_access = 112
- assert e.value.code == no_access
-
-
-@pytest.mark.parametrize(
- "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
-)
-async def test_private_dataset_access(
- user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
-) -> None:
- dataset = await get_dataset(
- dataset_id=130,
- user=user,
- user_db=user_test,
- expdb_db=expdb_test,
- )
- assert isinstance(dataset, DatasetMetadata)
-
-
-# ── Direct call tests: get_dataset_features ──
-
-
-async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
- features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
- by_index = {f.index: f for f in features}
- assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
- assert by_index[2].ontology == [
- "https://en.wikipedia.org/wiki/Car_door",
- "https://en.wikipedia.org/wiki/Door",
- ]
- assert by_index[3].ontology == [
- "https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
- ]
- assert by_index[0].ontology is None
- assert by_index[4].ontology is None
-
-
-async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
- with pytest.raises(DatasetNoAccessError):
- await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)
-
-
-@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
-async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
- features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
- assert isinstance(features, list)
-
-
-async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
- dataset_id = 55
- with pytest.raises(DatasetProcessingError) as e:
- await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
- assert "No features found" in e.value.detail
- assert str(dataset_id) in e.value.detail
-
-
-async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
- with pytest.raises(DatasetNotFoundError):
- await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
-
-
-# ── Direct call tests: update_dataset_status ──
-
-
-@pytest.mark.mut
-@pytest.mark.parametrize("dataset_id", [3, 4])
-async def test_dataset_status_update_active_to_deactivated(
- dataset_id: int, expdb_test: AsyncConnection
-) -> None:
- result = await update_dataset_status(
- dataset_id=dataset_id,
- status=DatasetStatus.DEACTIVATED,
- user=ADMIN_USER,
- expdb=expdb_test,
- )
- assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
-
-
-@pytest.mark.mut
-async def test_dataset_status_update_in_preparation_to_active(
- expdb_test: AsyncConnection,
-) -> None:
- dataset_id = next(iter(constants.IN_PREPARATION_ID))
- result = await update_dataset_status(
- dataset_id=dataset_id,
- status=DatasetStatus.ACTIVE,
- user=ADMIN_USER,
- expdb=expdb_test,
- )
- assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
-
-
-@pytest.mark.mut
-async def test_dataset_status_update_in_preparation_to_deactivated(
- expdb_test: AsyncConnection,
-) -> None:
- dataset_id = next(iter(constants.IN_PREPARATION_ID))
- result = await update_dataset_status(
- dataset_id=dataset_id,
- status=DatasetStatus.DEACTIVATED,
- user=ADMIN_USER,
- expdb=expdb_test,
- )
- assert result == {"dataset_id": dataset_id, "status": DatasetStatus.DEACTIVATED}
-
-
-@pytest.mark.mut
-async def test_dataset_status_update_deactivated_to_active(
- expdb_test: AsyncConnection,
-) -> None:
- dataset_id = next(iter(constants.DEACTIVATED_DATASETS))
- result = await update_dataset_status(
- dataset_id=dataset_id,
- status=DatasetStatus.ACTIVE,
- user=ADMIN_USER,
- expdb=expdb_test,
- )
- assert result == {"dataset_id": dataset_id, "status": DatasetStatus.ACTIVE}
-
-
-@pytest.mark.parametrize("dataset_id", [1, 33, 131])
-async def test_dataset_status_non_admin_cannot_activate(
- dataset_id: int,
- expdb_test: AsyncConnection,
-) -> None:
- with pytest.raises(DatasetAdminOnlyError):
- await update_dataset_status(
- dataset_id=dataset_id,
- status=DatasetStatus.ACTIVE,
- user=SOME_USER,
- expdb=expdb_test,
- )
-
-
-@pytest.mark.parametrize("dataset_id", [1, 2])
-async def test_dataset_status_non_owner_cannot_deactivate(
- dataset_id: int,
- expdb_test: AsyncConnection,
-) -> None:
- with pytest.raises(DatasetNotOwnedError):
- await update_dataset_status(
- dataset_id=dataset_id,
- status=DatasetStatus.DEACTIVATED,
- user=SOME_USER,
- expdb=expdb_test,
- )
From 7f588d9944bf091973f1e75b3e04ab60cd8b9893 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 26 Mar 2026 16:51:32 +0100
Subject: [PATCH 5/7] Fix pre-commit issues
---
tests/routers/openml/datasets_features_test.py | 4 +---
tests/routers/openml/datasets_list_datasets_test.py | 5 +++--
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/tests/routers/openml/datasets_features_test.py b/tests/routers/openml/datasets_features_test.py
index aa3988b5..193b0f31 100644
--- a/tests/routers/openml/datasets_features_test.py
+++ b/tests/routers/openml/datasets_features_test.py
@@ -86,9 +86,7 @@ async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
-async def test_dataset_features_access_to_private(
- user: User, expdb_test: AsyncConnection
-) -> None:
+async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
assert isinstance(features, list)
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index 10822cc5..d8fb5735 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -217,7 +217,8 @@ async def test_list_pagination(
expdb_db=expdb_test,
)
except NoResultsError:
- assert offset == 140, "Result was expected but NoResultsError was raised."
+ expect_empty_offset = 140
+ assert offset == expect_empty_offset, "Result was expected but NoResultsError was raised."
return
reported_ids = {dataset["did"] for dataset in result}
assert reported_ids == set(expected_ids)
@@ -340,6 +341,6 @@ async def test_list_data_quality(
status=DatasetStatusFilter.ALL,
user=None,
expdb_db=expdb_test,
- **{quality: range_},
+ **{quality: range_}, # type: ignore[arg-type]
)
assert len(result) == count
From 0b8f7b4a4a964c47259088479b6d6352666beae9 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 26 Mar 2026 17:20:47 +0100
Subject: [PATCH 6/7] trigger ci
From e16b04ea8e0506bd2a633e654a45190803f786a7 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 26 Mar 2026 17:24:07 +0100
Subject: [PATCH 7/7] Kick off CI for changes to tests
---
.github/workflows/tests.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 07f64402..d0af8bca 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,6 +6,7 @@ on:
pull_request:
paths:
- 'src/**'
+ - 'tests/**'
- 'docker/**'
- 'docker-compose.yaml'
- 'pyproject.toml'