From 9917272eec3946433c5e67ff2de7cb52a651ecae Mon Sep 17 00:00:00 2001 From: Tim Pellissier Date: Tue, 21 Oct 2025 21:47:46 -0700 Subject: [PATCH 1/3] Structured errors 2: more error types --- src/dataverse_sdk/error_codes.py | 19 ++++++ src/dataverse_sdk/errors.py | 51 +++++++++++--- src/dataverse_sdk/odata.py | 113 +++++++++++++++++++++---------- tests/test_http_errors.py | 94 +++++++++++++++++++++++++ tests/test_logical_crud.py | 3 +- 5 files changed, 231 insertions(+), 49 deletions(-) create mode 100644 tests/test_http_errors.py diff --git a/src/dataverse_sdk/error_codes.py b/src/dataverse_sdk/error_codes.py index 9406e0e2..4a58f75a 100644 --- a/src/dataverse_sdk/error_codes.py +++ b/src/dataverse_sdk/error_codes.py @@ -26,3 +26,22 @@ HTTP_503, HTTP_504, } + +# Validation subcodes +VALIDATION_SQL_NOT_STRING = "validation_sql_not_string" +VALIDATION_SQL_EMPTY = "validation_sql_empty" +VALIDATION_ENUM_NO_MEMBERS = "validation_enum_no_members" +VALIDATION_ENUM_NON_INT_VALUE = "validation_enum_non_int_value" +VALIDATION_UNSUPPORTED_COLUMN_TYPE = "validation_unsupported_column_type" +VALIDATION_UNSUPPORTED_CACHE_KIND = "validation_unsupported_cache_kind" + +# SQL parse subcodes +SQL_PARSE_TABLE_NOT_FOUND = "sql_parse_table_not_found" + +# Metadata subcodes +METADATA_ENTITYSET_NOT_FOUND = "metadata_entityset_not_found" +METADATA_ENTITYSET_NAME_MISSING = "metadata_entityset_name_missing" +METADATA_TABLE_NOT_FOUND = "metadata_table_not_found" +METADATA_TABLE_ALREADY_EXISTS = "metadata_table_already_exists" +METADATA_ATTRIBUTE_RETRY_EXHAUSTED = "metadata_attribute_retry_exhausted" +METADATA_PICKLIST_RETRY_EXHAUSTED = "metadata_picklist_retry_exhausted" diff --git a/src/dataverse_sdk/errors.py b/src/dataverse_sdk/errors.py index a492b473..ff0081ed 100644 --- a/src/dataverse_sdk/errors.py +++ b/src/dataverse_sdk/errors.py @@ -3,7 +3,6 @@ import datetime as _dt class DataverseError(Exception): - """Base structured error for the Dataverse SDK.""" def __init__( self, message: str, @@ -12,8 +11,8 @@ def __init__( subcode: Optional[str] = None, status_code: Optional[int] = None, details: Optional[Dict[str, Any]] = None, - source: Optional[Dict[str, Any]] = None, - is_transient: Optional[bool] = None, + source: Optional[str] = None, + is_transient: bool = False, ) -> None: super().__init__(message) self.message = message @@ -21,7 +20,7 @@ def __init__( self.subcode = subcode self.status_code = status_code self.details = details or {} - self.source = source or {} + self.source = source or "client" self.is_transient = is_transient self.timestamp = _dt.datetime.utcnow().isoformat() + "Z" @@ -40,25 +39,55 @@ def to_dict(self) -> Dict[str, Any]: def __repr__(self) -> str: # pragma: no cover return f"{self.__class__.__name__}(code={self.code!r}, subcode={self.subcode!r}, message={self.message!r})" +class ValidationError(DataverseError): + def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): + super().__init__(message, code="validation_error", subcode=subcode, details=details, source="client") + +class MetadataError(DataverseError): + def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): + super().__init__(message, code="metadata_error", subcode=subcode, details=details, source="client") + +class SQLParseError(DataverseError): + def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): + super().__init__(message, code="sql_parse_error", subcode=subcode, details=details, source="client") + class HttpError(DataverseError): def __init__( self, message: str, *, + status_code: int, subcode: Optional[str] = None, - status_code: Optional[int] = None, + service_error_code: Optional[str] = None, + correlation_id: Optional[str] = None, + request_id: Optional[str] = None, + traceparent: Optional[str] = None, + body_excerpt: Optional[str] = None, + retry_after: Optional[int] = None, details: Optional[Dict[str, Any]] = None, - source: Optional[Dict[str, Any]] = None, - is_transient: Optional[bool] = None, + is_transient: bool = False, ) -> None: + d = details or {} + if service_error_code is not None: + d["service_error_code"] = service_error_code + if correlation_id is not None: + d["correlation_id"] = correlation_id + if request_id is not None: + d["request_id"] = request_id + if traceparent is not None: + d["traceparent"] = traceparent + if body_excerpt is not None: + d["body_excerpt"] = body_excerpt + if retry_after is not None: + d["retry_after"] = retry_after super().__init__( message, - code="http", + code="http_error", subcode=subcode, status_code=status_code, - details=details, - source=source, + details=d, + source="server", is_transient=is_transient, ) -__all__ = ["DataverseError", "HttpError"] +__all__ = ["DataverseError", "HttpError", "ValidationError", "MetadataError", "SQLParseError"] diff --git a/src/dataverse_sdk/odata.py b/src/dataverse_sdk/odata.py index 1129a6bc..74483760 100644 --- a/src/dataverse_sdk/odata.py +++ b/src/dataverse_sdk/odata.py @@ -10,7 +10,7 @@ from .http import HttpClient from .odata_upload_files import ODataFileUpload -from .errors import HttpError +from .errors import HttpError, ValidationError, MetadataError, SQLParseError from . import error_codes as ec @@ -76,45 +76,67 @@ def _raw_request(self, method: str, url: str, **kwargs): return self._http.request(method, url, **kwargs) def _request(self, method: str, url: str, *, expected: tuple[int, ...] = (200, 201, 202, 204), **kwargs): - """Execute HTTP request; raise HttpError with structured details on failure. - - Returns the raw response for success codes; raises HttpError with extracted - Dataverse error payload fields and correlation identifiers otherwise. - """ - headers = kwargs.pop("headers", None) - kwargs["headers"] = self._merge_headers(headers) + headers_in = kwargs.pop("headers", None) + kwargs["headers"] = self._merge_headers(headers_in) r = self._raw_request(method, url, **kwargs) if r.status_code in expected: return r - payload = {} + headers = getattr(r, "headers", {}) or {} + body_excerpt = (getattr(r, "text", "") or "")[:200] + svc_code = None + msg = f"HTTP {r.status_code}" try: - payload = r.json() if getattr(r, 'text', None) else {} + data = r.json() if getattr(r, "text", None) else {} + if isinstance(data, dict): + inner = data.get("error") + if isinstance(inner, dict): + svc_code = inner.get("code") + imsg = inner.get("message") + if isinstance(imsg, str) and imsg.strip(): + msg = imsg.strip() + else: + imsg2 = data.get("message") + if isinstance(imsg2, str) and imsg2.strip(): + msg = imsg2.strip() except Exception: - payload = {} - svc_err = payload.get("error") if isinstance(payload, dict) else None - svc_code = svc_err.get("code") if isinstance(svc_err, dict) else None - svc_msg = svc_err.get("message") if isinstance(svc_err, dict) else None - message = svc_msg or f"HTTP {r.status_code}" - subcode = f"http_{r.status_code}" - - headers = getattr(r, 'headers', {}) or {} - details = { - "service_error_code": svc_code, - "body_excerpt": (getattr(r, 'text', '') or '')[:200], - "correlation_id": headers.get("x-ms-correlation-request-id") or headers.get("x-ms-correlation-id"), - "request_id": headers.get("x-ms-client-request-id") or headers.get("request-id"), - "traceparent": headers.get("traceparent"), + pass + sc = r.status_code + sub_map = { + 400: ec.HTTP_400, + 401: ec.HTTP_401, + 403: ec.HTTP_403, + 404: ec.HTTP_404, + 409: ec.HTTP_409, + 412: ec.HTTP_412, + 415: ec.HTTP_415, + 429: ec.HTTP_429, + 500: ec.HTTP_500, + 502: ec.HTTP_502, + 503: ec.HTTP_503, + 504: ec.HTTP_504, } + subcode = sub_map.get(sc, f"http_{sc}") + correlation_id = headers.get("x-ms-correlation-request-id") or headers.get("x-ms-correlation-id") + request_id = headers.get("x-ms-client-request-id") or headers.get("request-id") or headers.get("x-ms-request-id") + traceparent = headers.get("traceparent") ra = headers.get("Retry-After") + retry_after = None if ra: - details["retry_after"] = ra - is_transient = r.status_code in (429, 502, 503, 504) + try: + retry_after = int(ra) + except Exception: + retry_after = None + is_transient = sc in (429, 502, 503, 504) raise HttpError( - message, + msg, + status_code=sc, subcode=subcode, - status_code=r.status_code, - details=details, - source={"method": method, "url": url}, + service_error_code=svc_code, + correlation_id=correlation_id, + request_id=request_id, + traceparent=traceparent, + body_excerpt=body_excerpt, + retry_after=retry_after, is_transient=is_transient, ) @@ -497,8 +519,10 @@ def _query_sql(self, sql: str) -> list[dict[str, Any]]: RuntimeError If metadata lookup for the logical name fails. """ - if not isinstance(sql, str) or not sql.strip(): - raise ValueError("sql must be a non-empty string") + if not isinstance(sql, str): + raise ValidationError("sql must be a string", subcode=ec.VALIDATION_SQL_NOT_STRING) + if not sql.strip(): + raise ValidationError("sql must be a non-empty string", subcode=ec.VALIDATION_SQL_EMPTY) sql = sql.strip() # Extract logical table name via helper (robust to identifiers ending with 'from') @@ -567,11 +591,17 @@ def _entity_set_from_logical(self, logical: str) -> str: items = [] if not items: plural_hint = " (did you pass a plural entity set name instead of the singular logical name?)" if logical.endswith("s") and not logical.endswith("ss") else "" - raise RuntimeError(f"Unable to resolve entity set for logical name '{logical}'. Provide the singular logical name.{plural_hint}") + raise MetadataError( + f"Unable to resolve entity set for logical name '{logical}'. Provide the singular logical name.{plural_hint}", + subcode=ec.METADATA_ENTITYSET_NOT_FOUND, + ) md = items[0] es = md.get("EntitySetName") if not es: - raise RuntimeError(f"Metadata response missing EntitySetName for logical '{logical}'.") + raise MetadataError( + f"Metadata response missing EntitySetName for logical '{logical}'.", + subcode=ec.METADATA_ENTITYSET_NAME_MISSING, + ) self._logical_to_entityset_cache[logical] = es primary_id_attr = md.get("PrimaryIdAttribute") if isinstance(primary_id_attr, str) and primary_id_attr: @@ -1011,7 +1041,10 @@ def _delete_table(self, tablename: str) -> None: entity_schema = schema_name ent = self._get_entity_by_schema(entity_schema) if not ent or not ent.get("MetadataId"): - raise RuntimeError(f"Table '{entity_schema}' not found.") + raise MetadataError( + f"Table '{entity_schema}' not found.", + subcode=ec.METADATA_TABLE_NOT_FOUND, + ) metadata_id = ent["MetadataId"] url = f"{self.api}/EntityDefinitions({metadata_id})" r = self._request("delete", url) @@ -1023,7 +1056,10 @@ def _create_table(self, tablename: str, schema: Dict[str, Any]) -> Dict[str, Any ent = self._get_entity_by_schema(entity_schema) if ent: - raise RuntimeError(f"Table '{entity_schema}' already exists. No update performed.") + raise MetadataError( + f"Table '{entity_schema}' already exists.", + subcode=ec.METADATA_TABLE_ALREADY_EXISTS, + ) created_cols: List[str] = [] primary_attr_schema = "new_Name" if "_" not in entity_schema else f"{entity_schema.split('_',1)[0]}_Name" @@ -1077,7 +1113,10 @@ def _flush_cache( """ k = (kind or "").strip().lower() if k != "picklist": - raise ValueError(f"Unsupported cache kind '{kind}' (only 'picklist' is implemented)") + raise ValidationError( + f"Unsupported cache kind '{kind}' (only 'picklist' is implemented)", + subcode=ec.VALIDATION_UNSUPPORTED_CACHE_KIND, + ) removed = len(self._picklist_label_cache) self._picklist_label_cache.clear() diff --git a/tests/test_http_errors.py b/tests/test_http_errors.py new file mode 100644 index 00000000..f3b33a6c --- /dev/null +++ b/tests/test_http_errors.py @@ -0,0 +1,94 @@ +import pytest +from dataverse_sdk.errors import HttpError +from dataverse_sdk import error_codes as ec +from dataverse_sdk.odata import ODataClient + +class DummyAuth: + def acquire_token(self, scope): + class T: access_token = "x" + return T() + +class DummyHTTP: + def __init__(self, responses): + self._responses = responses + def request(self, method, url, **kwargs): + if not self._responses: + raise AssertionError("No more responses") + status, headers, body = self._responses.pop(0) + class R: + pass + r = R() + r.status_code = status + r.headers = headers + if isinstance(body, dict): + import json + r.text = json.dumps(body) + def json_func(): return body + r.json = json_func + else: + r.text = body or "" + def json_fail(): raise ValueError("non-json") + r.json = json_fail + return r + +class TestClient(ODataClient): + def __init__(self, responses): + super().__init__(DummyAuth(), "https://org.example", None) + self._http = DummyHTTP(responses) + +# --- Tests --- + +def test_http_404_subcode_and_service_code(): + responses = [( + 404, + {"x-ms-correlation-request-id": "cid1"}, + {"error": {"code": "0x800404", "message": "Not found"}}, + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts(abc)") + err = ei.value.to_dict() + assert err["subcode"] == ec.HTTP_404 + assert err["details"]["service_error_code"] == "0x800404" + + +def test_http_429_transient_and_retry_after(): + responses = [( + 429, + {"Retry-After": "7"}, + {"error": {"message": "Throttle"}}, + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts") + err = ei.value.to_dict() + assert err["is_transient"] is True + assert err["subcode"] == ec.HTTP_429 + assert err["details"]["retry_after"] == 7 + + +def test_http_500_body_excerpt(): + responses = [( + 500, + {}, + "Internal failure XYZ stack truncated", + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts") + err = ei.value.to_dict() + assert err["subcode"] == ec.HTTP_500 + assert "XYZ stack" in err["details"]["body_excerpt"] + + +def test_http_non_mapped_status_code_subcode_fallback(): + responses = [( + 418, # I'm a teapot (not in map) + {}, + {"error": {"message": "Teapot"}}, + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts") + err = ei.value.to_dict() + assert err["subcode"] == "http_418" diff --git a/tests/test_logical_crud.py b/tests/test_logical_crud.py index 8b552fee..f66033f9 100644 --- a/tests/test_logical_crud.py +++ b/tests/test_logical_crud.py @@ -1,6 +1,7 @@ import types import pytest from dataverse_sdk.odata import ODataClient +from dataverse_sdk.errors import MetadataError class DummyAuth: def acquire_token(self, scope): @@ -115,5 +116,5 @@ def test_unknown_logical_name_raises(): (200, {}, {"value": []}), # metadata lookup returns empty ] c = TestableClient(responses) - with pytest.raises(RuntimeError): + with pytest.raises(MetadataError): c._create("nonexistent", {"x": 1}) \ No newline at end of file From d7b060ea86cb15262031a63c639eb7ff04776f39 Mon Sep 17 00:00:00 2001 From: Tim Pellissier Date: Wed, 22 Oct 2025 16:05:41 -0700 Subject: [PATCH 2/3] PR fixes --- src/dataverse_sdk/error_codes.py | 24 ++++++++++++++++++++++++ src/dataverse_sdk/errors.py | 6 +++--- src/dataverse_sdk/odata.py | 20 +++----------------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/dataverse_sdk/error_codes.py b/src/dataverse_sdk/error_codes.py index 4a58f75a..12b35aae 100644 --- a/src/dataverse_sdk/error_codes.py +++ b/src/dataverse_sdk/error_codes.py @@ -45,3 +45,27 @@ METADATA_TABLE_ALREADY_EXISTS = "metadata_table_already_exists" METADATA_ATTRIBUTE_RETRY_EXHAUSTED = "metadata_attribute_retry_exhausted" METADATA_PICKLIST_RETRY_EXHAUSTED = "metadata_picklist_retry_exhausted" + +# Mapping from status code -> subcode +HTTP_STATUS_TO_SUBCODE: dict[int, str] = { + 400: HTTP_400, + 401: HTTP_401, + 403: HTTP_403, + 404: HTTP_404, + 409: HTTP_409, + 412: HTTP_412, + 415: HTTP_415, + 429: HTTP_429, + 500: HTTP_500, + 502: HTTP_502, + 503: HTTP_503, + 504: HTTP_504, +} + +TRANSIENT_STATUS = {429, 502, 503, 504} + +def http_subcode(status: int) -> str: + return HTTP_STATUS_TO_SUBCODE.get(status, f"http_{status}") + +def is_transient_status(status: int) -> bool: + return status in TRANSIENT_STATUS diff --git a/src/dataverse_sdk/errors.py b/src/dataverse_sdk/errors.py index ff0081ed..4dfddeb3 100644 --- a/src/dataverse_sdk/errors.py +++ b/src/dataverse_sdk/errors.py @@ -3,6 +3,7 @@ import datetime as _dt class DataverseError(Exception): + """Base structured error for the Dataverse SDK.""" def __init__( self, message: str, @@ -55,8 +56,8 @@ class HttpError(DataverseError): def __init__( self, message: str, - *, status_code: int, + is_transient: bool = False, subcode: Optional[str] = None, service_error_code: Optional[str] = None, correlation_id: Optional[str] = None, @@ -64,8 +65,7 @@ def __init__( traceparent: Optional[str] = None, body_excerpt: Optional[str] = None, retry_after: Optional[int] = None, - details: Optional[Dict[str, Any]] = None, - is_transient: bool = False, + details: Optional[Dict[str, Any]] = None ) -> None: d = details or {} if service_error_code is not None: diff --git a/src/dataverse_sdk/odata.py b/src/dataverse_sdk/odata.py index 74483760..e5712874 100644 --- a/src/dataverse_sdk/odata.py +++ b/src/dataverse_sdk/odata.py @@ -10,7 +10,7 @@ from .http import HttpClient from .odata_upload_files import ODataFileUpload -from .errors import HttpError, ValidationError, MetadataError, SQLParseError +from .errors import * from . import error_codes as ec @@ -101,21 +101,7 @@ def _request(self, method: str, url: str, *, expected: tuple[int, ...] = (200, 2 except Exception: pass sc = r.status_code - sub_map = { - 400: ec.HTTP_400, - 401: ec.HTTP_401, - 403: ec.HTTP_403, - 404: ec.HTTP_404, - 409: ec.HTTP_409, - 412: ec.HTTP_412, - 415: ec.HTTP_415, - 429: ec.HTTP_429, - 500: ec.HTTP_500, - 502: ec.HTTP_502, - 503: ec.HTTP_503, - 504: ec.HTTP_504, - } - subcode = sub_map.get(sc, f"http_{sc}") + subcode = ec.http_subcode(sc) correlation_id = headers.get("x-ms-correlation-request-id") or headers.get("x-ms-correlation-id") request_id = headers.get("x-ms-client-request-id") or headers.get("request-id") or headers.get("x-ms-request-id") traceparent = headers.get("traceparent") @@ -126,7 +112,7 @@ def _request(self, method: str, url: str, *, expected: tuple[int, ...] = (200, 2 retry_after = int(ra) except Exception: retry_after = None - is_transient = sc in (429, 502, 503, 504) + is_transient = ec.is_transient_status(sc) raise HttpError( msg, status_code=sc, From dc6500c2c0291c821fdaf2146e719afe96bc1f0f Mon Sep 17 00:00:00 2001 From: Tim Pellissier Date: Wed, 22 Oct 2025 16:21:39 -0700 Subject: [PATCH 3/3] fix tests --- tests/test_create_single_guid.py | 54 -------------------------------- tests/test_logical_crud.py | 8 +++-- 2 files changed, 5 insertions(+), 57 deletions(-) delete mode 100644 tests/test_create_single_guid.py diff --git a/tests/test_create_single_guid.py b/tests/test_create_single_guid.py deleted file mode 100644 index cf85768f..00000000 --- a/tests/test_create_single_guid.py +++ /dev/null @@ -1,54 +0,0 @@ -import types -from dataverse_sdk.odata import ODataClient, _GUID_RE - -class DummyAuth: - def acquire_token(self, scope): - class T: access_token = "x" - return T() - -class DummyHTTP: - def __init__(self, headers): - self._headers = headers - def request(self, method, url, **kwargs): - # Simulate minimal Response-like object (subset of requests.Response API used by code) - resp = types.SimpleNamespace() - resp.headers = self._headers - resp.status_code = 204 - resp.text = "" - def raise_for_status(): - return None - def json_func(): - return {} - resp.raise_for_status = raise_for_status - resp.json = json_func - return resp - -class TestableOData(ODataClient): - def __init__(self, headers): - super().__init__(DummyAuth(), "https://org.example", None) - # Monkey-patch http client - self._http = types.SimpleNamespace(request=lambda method, url, **kwargs: DummyHTTP(headers).request(method, url, **kwargs)) - # Bypass optionset label conversion to keep response sequence stable for tests - def _convert_labels_to_ints(self, logical_name, record): # pragma: no cover - test shim - return record - -def test__create_uses_odata_entityid(): - guid = "11111111-2222-3333-4444-555555555555" - headers = {"OData-EntityId": f"https://org.example/api/data/v9.2/accounts({guid})"} - c = TestableOData(headers) - # Current signature requires logical name explicitly - result = c._create("accounts", "account", {"name": "x"}) - assert result == guid - -def test__create_fallback_location(): - guid = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" - headers = {"Location": f"https://org.example/api/data/v9.2/contacts({guid})"} - c = TestableOData(headers) - result = c._create("contacts", "contact", {"firstname": "x"}) - assert result == guid - -def test__create_missing_headers_raises(): - c = TestableOData({}) - import pytest - with pytest.raises(RuntimeError): - c._create("accounts", "account", {"name": "x"}) diff --git a/tests/test_logical_crud.py b/tests/test_logical_crud.py index f66033f9..86ce2242 100644 --- a/tests/test_logical_crud.py +++ b/tests/test_logical_crud.py @@ -74,7 +74,8 @@ def test_single_create_update_delete_get(): (204, {}, {}), # delete ] c = TestableClient(responses) - rid = c._create("account", {"name": "Acme"}) + entity_set = c._entity_set_from_logical("account") + rid = c._create(entity_set, "account", {"name": "Acme"}) assert rid == guid rec = c._get("account", rid, select="accountid,name") assert rec["accountid"] == guid and rec["name"] == "Acme" @@ -93,7 +94,8 @@ def test_bulk_create_and_update(): (204, {}, {}), # UpdateMultiple 1:1 ] c = TestableClient(responses) - ids = c._create("account", [{"name": "A"}, {"name": "B"}]) + entity_set = c._entity_set_from_logical("account") + ids = c._create_multiple(entity_set, "account", [{"name": "A"}, {"name": "B"}]) assert ids == [g1, g2] c._update_by_ids("account", ids, {"statecode": 1}) # broadcast c._update_by_ids("account", ids, [{"name": "A1"}, {"name": "B1"}]) # per-record @@ -117,4 +119,4 @@ def test_unknown_logical_name_raises(): ] c = TestableClient(responses) with pytest.raises(MetadataError): - c._create("nonexistent", {"x": 1}) \ No newline at end of file + c._entity_set_from_logical("nonexistent") \ No newline at end of file