Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,20 @@ class ServiceNotFoundError(ProblemDetailError):
_default_status_code = HTTPStatus.NOT_FOUND


# =============================================================================
# Quality Errors
# =============================================================================


class NoQualitiesError(ProblemDetailError):
"""Raised when a dataset has no stored quality values."""

uri = "https://openml.org/problems/quality-no-qualities"
title = "No Qualities Found"
_default_status_code = HTTPStatus.PRECONDITION_FAILED
_default_code = 362


# =============================================================================
# Internal Errors
# =============================================================================
Expand Down
39 changes: 24 additions & 15 deletions src/routers/openml/qualities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from http import HTTPStatus
from typing import Annotated, Literal

from fastapi import APIRouter, Depends
Expand All @@ -7,7 +6,12 @@
import database.datasets
import database.qualities
from core.access import _user_has_access
from core.errors import DatasetNotFoundError
from core.errors import (
DatasetNotFoundError,
DatasetNotProcessedError,
DatasetProcessingError,
NoQualitiesError,
)
from database.users import User
from routers.dependencies import expdb_connection, fetch_user
from schemas.datasets.openml import Quality
Expand Down Expand Up @@ -35,19 +39,24 @@ async def get_qualities(
) -> list[Quality]:
dataset = await database.datasets.get(dataset_id, expdb)
if not dataset or not await _user_has_access(dataset, user):
# Backwards compatibility: PHP API returns 412 with code 113
msg = f"Dataset with id {dataset_id} not found."
no_data_file = 113
raise DatasetNotFoundError(
msg,
code=no_data_file,
status_code=HTTPStatus.PRECONDITION_FAILED,
)
return await database.qualities.get_for_dataset(dataset_id, expdb)
# The PHP API provided (sometime) helpful error messages
# if not qualities:
# check if dataset exists: error 360
# check if user has access: error 361
# check if there is a data processed entry and forward the error: 364
# if nothing in process table: 363
# otherwise: error 362
code=361,
) from None

processing = await database.datasets.get_latest_processing_update(dataset_id, expdb)
if processing is None:
msg = f"Dataset not processed yet for dataset {dataset_id}."
raise DatasetNotProcessedError(msg, code=363)

if processing.error:
msg = processing.error.strip() or "Error occurred during processing."
raise DatasetProcessingError(msg, code=364)

qualities = await database.qualities.get_for_dataset(dataset_id, expdb)
if not qualities:
msg = f"No qualities found for dataset {dataset_id}."
raise NoQualitiesError(msg)

return qualities
74 changes: 46 additions & 28 deletions tests/routers/openml/qualities_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import re
from http import HTTPStatus

import deepdiff
Expand All @@ -7,8 +8,6 @@
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncConnection

from core.errors import DatasetNotFoundError


async def _remove_quality_from_database(quality_name: str, expdb_test: AsyncConnection) -> None:
await expdb_test.execute(
Expand Down Expand Up @@ -287,7 +286,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None:

@pytest.mark.parametrize(
"data_id",
list(set(range(1, 132)) - {55, 56, 59, 116, 130}),
[*list(set(range(1, 133))), 9999999],
)
async def test_get_quality_identical(
data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
Expand All @@ -296,8 +295,24 @@ async def test_get_quality_identical(
py_api.get(f"/datasets/qualities/{data_id}"),
php_api.get(f"/data/qualities/{data_id}"),
)
assert python_response.status_code == php_response.status_code
if php_response.status_code == HTTPStatus.OK:
_assert_get_quality_success_equal(python_response, php_response)
return

php_error_code = int(php_response.json()["error"]["code"])
if php_error_code == 361: # noqa: PLR2004
_assert_get_quality_error_dataset_not_found(python_response, php_response)
elif php_error_code == 364: # noqa: PLR2004
_assert_get_quality_error_dataset_process_error(python_response, php_response)
else:
msg = f"Dataset {data_id} response not under test:", php_response.json()
raise AssertionError(msg)


def _assert_get_quality_success_equal(
python_response: httpx.Response, php_response: httpx.Response
) -> None:
assert python_response.status_code == php_response.status_code
expected = [
{
"name": quality["name"],
Expand All @@ -308,28 +323,31 @@ async def test_get_quality_identical(
assert python_response.json() == expected


@pytest.mark.parametrize(
"data_id",
[55, 56, 59, 116, 130, 132],
)
async def test_get_quality_identical_error(
data_id: int,
py_api: httpx.AsyncClient,
php_api: httpx.AsyncClient,
def _assert_get_quality_error_dataset_not_found(
python_response: httpx.Response, php_response: httpx.Response
) -> None:
if data_id in [55, 56, 59]:
pytest.skip("Detailed error for code 364 (failed processing) not yet supported.")
if data_id in [116]: # noqa: FURB171
pytest.skip("Detailed error for code 362 (no qualities) not yet supported.")
python_response, php_response = await asyncio.gather(
py_api.get(f"/datasets/qualities/{data_id}"),
php_api.get(f"/data/qualities/{data_id}"),
)
assert python_response.status_code == php_response.status_code
# RFC 9457: Python API now returns problem+json format
assert python_response.headers["content-type"] == "application/problem+json"
error = python_response.json()
assert error["type"] == DatasetNotFoundError.uri
# Verify the error message matches the PHP API semantically
assert php_response.json()["error"]["message"] == "Unknown dataset"
assert error["detail"] == f"Dataset with id {data_id} not found."
assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
assert python_response.status_code == HTTPStatus.NOT_FOUND

php_error = php_response.json()["error"]
py_error = python_response.json()

assert php_error["code"] == py_error["code"]
assert php_error["message"] == "Unknown dataset"
assert re.match(r"Dataset with id \d+ not found.", py_error["detail"])


def _assert_get_quality_error_dataset_process_error(
python_response: httpx.Response, php_response: httpx.Response
) -> None:
assert php_response.status_code == python_response.status_code

php_error = php_response.json()["error"]
py_error = python_response.json()

assert php_error["code"] == py_error["code"]
assert php_error["message"] == "Dataset processed with error"
assert py_error["title"] == "Dataset Processing Error"
# The PHP can add some additional unnecessary escapes.
assert php_error["additional_information"][:30] == py_error["detail"][:30]
assert php_error["additional_information"][-30:] == py_error["detail"][-30:]
Loading