Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
pull_request:
paths:
- 'src/**'
- 'tests/**'
- 'docker/**'
- 'docker-compose.yaml'
- 'pyproject.toml'
Expand Down
106 changes: 53 additions & 53 deletions tests/routers/openml/dataset_tag_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import pytest
from sqlalchemy.ext.asyncio import AsyncConnection

from core.errors import AuthenticationFailedError, TagAlreadyExistsError
from core.errors import TagAlreadyExistsError
from database.datasets import get_tags_for
from database.users import User
from routers.openml.datasets import tag_dataset
from tests import constants
from tests.users import ApiKey
from tests.users import ADMIN_USER, OWNER_USER, SOME_USER, ApiKey


@pytest.mark.parametrize(
Expand All @@ -22,73 +24,71 @@ async def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: httpx.Async
json={"data_id": next(iter(constants.PRIVATE_DATASET_ID)), "tag": "test"},
)
assert response.status_code == HTTPStatus.UNAUTHORIZED
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
assert error["type"] == AuthenticationFailedError.uri
assert error["code"] == "103"


@pytest.mark.parametrize(
"tag",
["", "h@", " a", "a" * 65],
ids=["too short", "@", "space", "too long"],
)
async def test_dataset_tag_invalid_tag_is_rejected(
# Constraints for the tag are handled by FastAPI
tag: str,
py_api: httpx.AsyncClient,
) -> None:
new = await py_api.post(
f"/datasets/tag?api_key={ApiKey.ADMIN}",
json={"data_id": 1, "tag": tag},
)

assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
assert new.json()["detail"][0]["loc"] == ["body", "tag"]


# ── Direct call tests: tag_dataset ──


@pytest.mark.mut
@pytest.mark.parametrize(
"key",
[ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
"user",
[ADMIN_USER, SOME_USER, OWNER_USER],
ids=["administrator", "non-owner", "owner"],
)
async def test_dataset_tag(
key: ApiKey, expdb_test: AsyncConnection, py_api: httpx.AsyncClient
) -> None:
async def test_dataset_tag(user: User, expdb_test: AsyncConnection) -> None:
dataset_id, tag = next(iter(constants.PRIVATE_DATASET_ID)), "test"
response = await py_api.post(
f"/datasets/tag?api_key={key}",
json={"data_id": dataset_id, "tag": tag},
result = await tag_dataset(
data_id=dataset_id,
tag=tag,
user=user,
expdb_db=expdb_test,
)
assert response.status_code == HTTPStatus.OK
assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}
assert result == {"data_tag": {"id": str(dataset_id), "tag": [tag]}}

tags = await get_tags_for(id_=dataset_id, connection=expdb_test)
assert tag in tags


@pytest.mark.mut
async def test_dataset_tag_returns_existing_tags(py_api: httpx.AsyncClient) -> None:
dataset_id, tag = 1, "test"
response = await py_api.post(
f"/datasets/tag?api_key={ApiKey.ADMIN}",
json={"data_id": dataset_id, "tag": tag},
async def test_dataset_tag_returns_existing_tags(expdb_test: AsyncConnection) -> None:
dataset_id, tag = 1, "test" # Dataset 1 already is tagged with 'study_14'
result = await tag_dataset(
data_id=dataset_id,
tag=tag,
user=ADMIN_USER,
expdb_db=expdb_test,
)
assert response.status_code == HTTPStatus.OK
assert response.json() == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}
assert result == {"data_tag": {"id": str(dataset_id), "tag": ["study_14", tag]}}


@pytest.mark.mut
async def test_dataset_tag_fails_if_tag_exists(py_api: httpx.AsyncClient) -> None:
async def test_dataset_tag_fails_if_tag_exists(expdb_test: AsyncConnection) -> None:
dataset_id, tag = 1, "study_14" # Dataset 1 already is tagged with 'study_14'
response = await py_api.post(
f"/datasets/tag?api_key={ApiKey.ADMIN}",
json={"data_id": dataset_id, "tag": tag},
)
assert response.status_code == HTTPStatus.CONFLICT
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
assert error["type"] == TagAlreadyExistsError.uri
assert error["code"] == "473"
assert str(dataset_id) in error["detail"]
assert tag in error["detail"]


@pytest.mark.parametrize(
"tag",
["", "h@", " a", "a" * 65],
ids=["too short", "@", "space", "too long"],
)
async def test_dataset_tag_invalid_tag_is_rejected(
tag: str,
py_api: httpx.AsyncClient,
) -> None:
new = await py_api.post(
f"/datasets/tag?api_key={ApiKey.ADMIN}",
json={"data_id": 1, "tag": tag},
)

assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
assert new.json()["detail"][0]["loc"] == ["body", "tag"]
with pytest.raises(TagAlreadyExistsError) as e:
await tag_dataset(
data_id=dataset_id,
tag=tag,
user=ADMIN_USER,
expdb_db=expdb_test,
)
assert str(dataset_id) in e.value.detail
assert tag in e.value.detail
104 changes: 104 additions & 0 deletions tests/routers/openml/datasets_features_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Tests for the GET /datasets/features/{dataset_id} endpoint."""

from http import HTTPStatus

import httpx
import pytest
from sqlalchemy.ext.asyncio import AsyncConnection

from core.errors import DatasetNoAccessError, DatasetNotFoundError, DatasetProcessingError
from database.users import User
from routers.openml.datasets import get_dataset_features
from tests.users import ADMIN_USER, DATASET_130_OWNER


async def test_get_features_via_api(py_api: httpx.AsyncClient) -> None:
response = await py_api.get("/datasets/features/4")
assert response.status_code == HTTPStatus.OK
assert response.json() == [
{
"index": 0,
"name": "left-weight",
"data_type": "numeric",
"is_target": False,
"is_ignore": False,
"is_row_identifier": False,
"number_of_missing_values": 0,
},
{
"index": 1,
"name": "left-distance",
"data_type": "numeric",
"is_target": False,
"is_ignore": False,
"is_row_identifier": False,
"number_of_missing_values": 0,
},
{
"index": 2,
"name": "right-weight",
"data_type": "numeric",
"is_target": False,
"is_ignore": False,
"is_row_identifier": False,
"number_of_missing_values": 0,
},
{
"index": 3,
"name": "right-distance",
"data_type": "numeric",
"is_target": False,
"is_ignore": False,
"is_row_identifier": False,
"number_of_missing_values": 0,
},
{
"index": 4,
"name": "class",
"data_type": "nominal",
"nominal_values": ["B", "L", "R"],
"is_target": True,
"is_ignore": False,
"is_row_identifier": False,
"number_of_missing_values": 0,
},
]


async def test_dataset_features_with_ontology(expdb_test: AsyncConnection) -> None:
features = await get_dataset_features(dataset_id=11, user=None, expdb=expdb_test)
by_index = {f.index: f for f in features}
assert by_index[1].ontology == ["https://en.wikipedia.org/wiki/Service_(motor_vehicle)"]
assert by_index[2].ontology == [
"https://en.wikipedia.org/wiki/Car_door",
"https://en.wikipedia.org/wiki/Door",
]
assert by_index[3].ontology == [
"https://en.wikipedia.org/wiki/Passenger_vehicles_in_the_United_States"
]
assert by_index[0].ontology is None
assert by_index[4].ontology is None


async def test_dataset_features_no_access(expdb_test: AsyncConnection) -> None:
with pytest.raises(DatasetNoAccessError):
await get_dataset_features(dataset_id=130, user=None, expdb=expdb_test)


@pytest.mark.parametrize("user", [ADMIN_USER, DATASET_130_OWNER])
async def test_dataset_features_access_to_private(user: User, expdb_test: AsyncConnection) -> None:
features = await get_dataset_features(dataset_id=130, user=user, expdb=expdb_test)
assert isinstance(features, list)


async def test_dataset_features_with_processing_error(expdb_test: AsyncConnection) -> None:
dataset_id = 55
with pytest.raises(DatasetProcessingError) as e:
await get_dataset_features(dataset_id=dataset_id, user=None, expdb=expdb_test)
assert "No features found" in e.value.detail
assert str(dataset_id) in e.value.detail


async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
with pytest.raises(DatasetNotFoundError):
await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
142 changes: 142 additions & 0 deletions tests/routers/openml/datasets_get_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Tests for the GET /datasets/{dataset_id} endpoint."""

import re
from http import HTTPStatus

import httpx
import pytest
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncConnection

from core.errors import DatasetNoAccessError, DatasetNotFoundError
from database.users import User
from routers.openml.datasets import get_dataset
from schemas.datasets.openml import DatasetMetadata
from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER


async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
response = await py_api.get("/datasets/1")
assert response.status_code == HTTPStatus.OK
description = response.json()
assert description.pop("description").startswith("**Author**:")
assert description == {
"id": 1,
"name": "anneal",
"version": 1,
"format": "arff",
"description_version": 1,
"upload_date": "2014-04-06T23:19:24",
"licence": "Public",
"url": "http://php-api/data/v1/download/1/anneal.arff",
"parquet_url": "http://minio:9000/datasets/0000/0001/dataset_1.pq",
"file_id": 1,
"default_target_attribute": ["class"],
"version_label": "1",
"tag": ["study_14"],
"visibility": "public",
"status": "active",
"processing_date": "2024-01-04T10:13:59",
"md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",
"row_id_attribute": [],
"ignore_attribute": [],
"language": "",
"error": None,
"warning": None,
"citation": "",
"collection_date": None,
"contributor": [],
"creator": [],
"paper_url": None,
"original_data_url": [],
}


async def test_rfc9457_error_format(py_api: httpx.AsyncClient) -> None:
"""Single test for the generic RFC 9457 exception handler — covers all error types."""
response = await py_api.get("/datasets/100000")
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
assert error["type"] == DatasetNotFoundError.uri
assert error["title"] == "Dataset Not Found"
assert error["status"] == HTTPStatus.NOT_FOUND
assert re.match(r"No dataset with id \d+ found.", error["detail"])
assert error["code"] == "111"


@pytest.mark.mut
async def test_dataset_no_500_with_multiple_processing_entries(
py_api: httpx.AsyncClient,
expdb_test: AsyncConnection,
) -> None:
"""Regression test for issue #145: multiple processing entries caused 500."""
await expdb_test.execute(
text("INSERT INTO evaluation_engine(id, name, description) VALUES (99, 'test_engine', '')"),
)
await expdb_test.execute(
text(
"INSERT INTO data_processed(did, evaluation_engine_id, user_id, processing_date) "
"VALUES (1, 99, 2, '2020-01-01 00:00:00')",
),
)
response = await py_api.get("/datasets/1")
assert response.status_code == HTTPStatus.OK


@pytest.mark.parametrize(
"dataset_id",
[-1, 138, 100_000],
)
async def test_get_dataset_not_found(
dataset_id: int,
expdb_test: AsyncConnection,
user_test: AsyncConnection,
) -> None:
with pytest.raises(DatasetNotFoundError):
await get_dataset(
dataset_id=dataset_id,
user=None,
user_db=user_test,
expdb_db=expdb_test,
)


@pytest.mark.parametrize(
"user",
[
NO_USER,
SOME_USER,
],
)
async def test_private_dataset_no_access(
user: User | None,
expdb_test: AsyncConnection,
user_test: AsyncConnection,
) -> None:
with pytest.raises(DatasetNoAccessError) as e:
await get_dataset(
dataset_id=130,
user=user,
user_db=user_test,
expdb_db=expdb_test,
)
assert e.value.status_code == HTTPStatus.FORBIDDEN
assert e.value.uri == DatasetNoAccessError.uri
no_access = 112
assert e.value.code == no_access


@pytest.mark.parametrize(
"user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
)
async def test_private_dataset_access(
user: User, expdb_test: AsyncConnection, user_test: AsyncConnection
) -> None:
dataset = await get_dataset(
dataset_id=130,
user=user,
user_db=user_test,
expdb_db=expdb_test,
)
assert isinstance(dataset, DatasetMetadata)
Loading
Loading