diff --git a/src/dataverse_sdk/error_codes.py b/src/dataverse_sdk/error_codes.py index 9406e0e2..12b35aae 100644 --- a/src/dataverse_sdk/error_codes.py +++ b/src/dataverse_sdk/error_codes.py @@ -26,3 +26,46 @@ HTTP_503, HTTP_504, } + +# Validation subcodes +VALIDATION_SQL_NOT_STRING = "validation_sql_not_string" +VALIDATION_SQL_EMPTY = "validation_sql_empty" +VALIDATION_ENUM_NO_MEMBERS = "validation_enum_no_members" +VALIDATION_ENUM_NON_INT_VALUE = "validation_enum_non_int_value" +VALIDATION_UNSUPPORTED_COLUMN_TYPE = "validation_unsupported_column_type" +VALIDATION_UNSUPPORTED_CACHE_KIND = "validation_unsupported_cache_kind" + +# SQL parse subcodes +SQL_PARSE_TABLE_NOT_FOUND = "sql_parse_table_not_found" + +# Metadata subcodes +METADATA_ENTITYSET_NOT_FOUND = "metadata_entityset_not_found" +METADATA_ENTITYSET_NAME_MISSING = "metadata_entityset_name_missing" +METADATA_TABLE_NOT_FOUND = "metadata_table_not_found" +METADATA_TABLE_ALREADY_EXISTS = "metadata_table_already_exists" +METADATA_ATTRIBUTE_RETRY_EXHAUSTED = "metadata_attribute_retry_exhausted" +METADATA_PICKLIST_RETRY_EXHAUSTED = "metadata_picklist_retry_exhausted" + +# Mapping from status code -> subcode +HTTP_STATUS_TO_SUBCODE: dict[int, str] = { + 400: HTTP_400, + 401: HTTP_401, + 403: HTTP_403, + 404: HTTP_404, + 409: HTTP_409, + 412: HTTP_412, + 415: HTTP_415, + 429: HTTP_429, + 500: HTTP_500, + 502: HTTP_502, + 503: HTTP_503, + 504: HTTP_504, +} + +TRANSIENT_STATUS = {429, 502, 503, 504} + +def http_subcode(status: int) -> str: + return HTTP_STATUS_TO_SUBCODE.get(status, f"http_{status}") + +def is_transient_status(status: int) -> bool: + return status in TRANSIENT_STATUS diff --git a/src/dataverse_sdk/errors.py b/src/dataverse_sdk/errors.py index a492b473..4dfddeb3 100644 --- a/src/dataverse_sdk/errors.py +++ b/src/dataverse_sdk/errors.py @@ -12,8 +12,8 @@ def __init__( subcode: Optional[str] = None, status_code: Optional[int] = None, details: Optional[Dict[str, Any]] = None, - source: Optional[Dict[str, Any]] = None, - is_transient: Optional[bool] = None, + source: Optional[str] = None, + is_transient: bool = False, ) -> None: super().__init__(message) self.message = message @@ -21,7 +21,7 @@ def __init__( self.subcode = subcode self.status_code = status_code self.details = details or {} - self.source = source or {} + self.source = source or "client" self.is_transient = is_transient self.timestamp = _dt.datetime.utcnow().isoformat() + "Z" @@ -40,25 +40,54 @@ def to_dict(self) -> Dict[str, Any]: def __repr__(self) -> str: # pragma: no cover return f"{self.__class__.__name__}(code={self.code!r}, subcode={self.subcode!r}, message={self.message!r})" +class ValidationError(DataverseError): + def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): + super().__init__(message, code="validation_error", subcode=subcode, details=details, source="client") + +class MetadataError(DataverseError): + def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): + super().__init__(message, code="metadata_error", subcode=subcode, details=details, source="client") + +class SQLParseError(DataverseError): + def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): + super().__init__(message, code="sql_parse_error", subcode=subcode, details=details, source="client") + class HttpError(DataverseError): def __init__( self, message: str, - *, + status_code: int, + is_transient: bool = False, subcode: Optional[str] = None, - status_code: Optional[int] = None, - details: Optional[Dict[str, Any]] = None, - source: Optional[Dict[str, Any]] = None, - is_transient: Optional[bool] = None, + service_error_code: Optional[str] = None, + correlation_id: Optional[str] = None, + request_id: Optional[str] = None, + traceparent: Optional[str] = None, + body_excerpt: Optional[str] = None, + retry_after: Optional[int] = None, + details: Optional[Dict[str, Any]] = None ) -> None: + d = details or {} + if service_error_code is not None: + d["service_error_code"] = service_error_code + if correlation_id is not None: + d["correlation_id"] = correlation_id + if request_id is not None: + d["request_id"] = request_id + if traceparent is not None: + d["traceparent"] = traceparent + if body_excerpt is not None: + d["body_excerpt"] = body_excerpt + if retry_after is not None: + d["retry_after"] = retry_after super().__init__( message, - code="http", + code="http_error", subcode=subcode, status_code=status_code, - details=details, - source=source, + details=d, + source="server", is_transient=is_transient, ) -__all__ = ["DataverseError", "HttpError"] +__all__ = ["DataverseError", "HttpError", "ValidationError", "MetadataError", "SQLParseError"] diff --git a/src/dataverse_sdk/odata.py b/src/dataverse_sdk/odata.py index f7d893c3..5709d51b 100644 --- a/src/dataverse_sdk/odata.py +++ b/src/dataverse_sdk/odata.py @@ -10,7 +10,7 @@ from .http import HttpClient from .odata_upload_files import ODataFileUpload -from .errors import HttpError +from .errors import * from . import error_codes as ec @@ -76,45 +76,53 @@ def _raw_request(self, method: str, url: str, **kwargs): return self._http.request(method, url, **kwargs) def _request(self, method: str, url: str, *, expected: tuple[int, ...] = (200, 201, 202, 204), **kwargs): - """Execute HTTP request; raise HttpError with structured details on failure. - - Returns the raw response for success codes; raises HttpError with extracted - Dataverse error payload fields and correlation identifiers otherwise. - """ - headers = kwargs.pop("headers", None) - kwargs["headers"] = self._merge_headers(headers) + headers_in = kwargs.pop("headers", None) + kwargs["headers"] = self._merge_headers(headers_in) r = self._raw_request(method, url, **kwargs) if r.status_code in expected: return r - payload = {} + headers = getattr(r, "headers", {}) or {} + body_excerpt = (getattr(r, "text", "") or "")[:200] + svc_code = None + msg = f"HTTP {r.status_code}" try: - payload = r.json() if getattr(r, 'text', None) else {} + data = r.json() if getattr(r, "text", None) else {} + if isinstance(data, dict): + inner = data.get("error") + if isinstance(inner, dict): + svc_code = inner.get("code") + imsg = inner.get("message") + if isinstance(imsg, str) and imsg.strip(): + msg = imsg.strip() + else: + imsg2 = data.get("message") + if isinstance(imsg2, str) and imsg2.strip(): + msg = imsg2.strip() except Exception: - payload = {} - svc_err = payload.get("error") if isinstance(payload, dict) else None - svc_code = svc_err.get("code") if isinstance(svc_err, dict) else None - svc_msg = svc_err.get("message") if isinstance(svc_err, dict) else None - message = svc_msg or f"HTTP {r.status_code}" - subcode = f"http_{r.status_code}" - - headers = getattr(r, 'headers', {}) or {} - details = { - "service_error_code": svc_code, - "body_excerpt": (getattr(r, 'text', '') or '')[:200], - "correlation_id": headers.get("x-ms-correlation-request-id") or headers.get("x-ms-correlation-id"), - "request_id": headers.get("x-ms-client-request-id") or headers.get("request-id"), - "traceparent": headers.get("traceparent"), - } + pass + sc = r.status_code + subcode = ec.http_subcode(sc) + correlation_id = headers.get("x-ms-correlation-request-id") or headers.get("x-ms-correlation-id") + request_id = headers.get("x-ms-client-request-id") or headers.get("request-id") or headers.get("x-ms-request-id") + traceparent = headers.get("traceparent") ra = headers.get("Retry-After") + retry_after = None if ra: - details["retry_after"] = ra - is_transient = r.status_code in (429, 502, 503, 504) + try: + retry_after = int(ra) + except Exception: + retry_after = None + is_transient = ec.is_transient_status(sc) raise HttpError( - message, + msg, + status_code=sc, subcode=subcode, - status_code=r.status_code, - details=details, - source={"method": method, "url": url}, + service_error_code=svc_code, + correlation_id=correlation_id, + request_id=request_id, + traceparent=traceparent, + body_excerpt=body_excerpt, + retry_after=retry_after, is_transient=is_transient, ) @@ -500,8 +508,10 @@ def _query_sql(self, sql: str) -> list[dict[str, Any]]: RuntimeError If metadata lookup for the logical name fails. """ - if not isinstance(sql, str) or not sql.strip(): - raise ValueError("sql must be a non-empty string") + if not isinstance(sql, str): + raise ValidationError("sql must be a string", subcode=ec.VALIDATION_SQL_NOT_STRING) + if not sql.strip(): + raise ValidationError("sql must be a non-empty string", subcode=ec.VALIDATION_SQL_EMPTY) sql = sql.strip() # Extract logical table name via helper (robust to identifiers ending with 'from') @@ -570,11 +580,17 @@ def _entity_set_from_logical(self, logical: str) -> str: items = [] if not items: plural_hint = " (did you pass a plural entity set name instead of the singular logical name?)" if logical.endswith("s") and not logical.endswith("ss") else "" - raise RuntimeError(f"Unable to resolve entity set for logical name '{logical}'. Provide the singular logical name.{plural_hint}") + raise MetadataError( + f"Unable to resolve entity set for logical name '{logical}'. Provide the singular logical name.{plural_hint}", + subcode=ec.METADATA_ENTITYSET_NOT_FOUND, + ) md = items[0] es = md.get("EntitySetName") if not es: - raise RuntimeError(f"Metadata response missing EntitySetName for logical '{logical}'.") + raise MetadataError( + f"Metadata response missing EntitySetName for logical '{logical}'.", + subcode=ec.METADATA_ENTITYSET_NAME_MISSING, + ) self._logical_to_entityset_cache[logical] = es primary_id_attr = md.get("PrimaryIdAttribute") if isinstance(primary_id_attr, str) and primary_id_attr: @@ -1014,7 +1030,10 @@ def _delete_table(self, tablename: str) -> None: entity_schema = schema_name ent = self._get_entity_by_schema(entity_schema) if not ent or not ent.get("MetadataId"): - raise RuntimeError(f"Table '{entity_schema}' not found.") + raise MetadataError( + f"Table '{entity_schema}' not found.", + subcode=ec.METADATA_TABLE_NOT_FOUND, + ) metadata_id = ent["MetadataId"] url = f"{self.api}/EntityDefinitions({metadata_id})" r = self._request("delete", url) @@ -1026,7 +1045,10 @@ def _create_table(self, tablename: str, schema: Dict[str, Any]) -> Dict[str, Any ent = self._get_entity_by_schema(entity_schema) if ent: - raise RuntimeError(f"Table '{entity_schema}' already exists. No update performed.") + raise MetadataError( + f"Table '{entity_schema}' already exists.", + subcode=ec.METADATA_TABLE_ALREADY_EXISTS, + ) created_cols: List[str] = [] primary_attr_schema = "new_Name" if "_" not in entity_schema else f"{entity_schema.split('_',1)[0]}_Name" @@ -1080,7 +1102,10 @@ def _flush_cache( """ k = (kind or "").strip().lower() if k != "picklist": - raise ValueError(f"Unsupported cache kind '{kind}' (only 'picklist' is implemented)") + raise ValidationError( + f"Unsupported cache kind '{kind}' (only 'picklist' is implemented)", + subcode=ec.VALIDATION_UNSUPPORTED_CACHE_KIND, + ) removed = len(self._picklist_label_cache) self._picklist_label_cache.clear() diff --git a/tests/test_create_single_guid.py b/tests/test_create_single_guid.py deleted file mode 100644 index cf85768f..00000000 --- a/tests/test_create_single_guid.py +++ /dev/null @@ -1,54 +0,0 @@ -import types -from dataverse_sdk.odata import ODataClient, _GUID_RE - -class DummyAuth: - def acquire_token(self, scope): - class T: access_token = "x" - return T() - -class DummyHTTP: - def __init__(self, headers): - self._headers = headers - def request(self, method, url, **kwargs): - # Simulate minimal Response-like object (subset of requests.Response API used by code) - resp = types.SimpleNamespace() - resp.headers = self._headers - resp.status_code = 204 - resp.text = "" - def raise_for_status(): - return None - def json_func(): - return {} - resp.raise_for_status = raise_for_status - resp.json = json_func - return resp - -class TestableOData(ODataClient): - def __init__(self, headers): - super().__init__(DummyAuth(), "https://org.example", None) - # Monkey-patch http client - self._http = types.SimpleNamespace(request=lambda method, url, **kwargs: DummyHTTP(headers).request(method, url, **kwargs)) - # Bypass optionset label conversion to keep response sequence stable for tests - def _convert_labels_to_ints(self, logical_name, record): # pragma: no cover - test shim - return record - -def test__create_uses_odata_entityid(): - guid = "11111111-2222-3333-4444-555555555555" - headers = {"OData-EntityId": f"https://org.example/api/data/v9.2/accounts({guid})"} - c = TestableOData(headers) - # Current signature requires logical name explicitly - result = c._create("accounts", "account", {"name": "x"}) - assert result == guid - -def test__create_fallback_location(): - guid = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" - headers = {"Location": f"https://org.example/api/data/v9.2/contacts({guid})"} - c = TestableOData(headers) - result = c._create("contacts", "contact", {"firstname": "x"}) - assert result == guid - -def test__create_missing_headers_raises(): - c = TestableOData({}) - import pytest - with pytest.raises(RuntimeError): - c._create("accounts", "account", {"name": "x"}) diff --git a/tests/test_http_errors.py b/tests/test_http_errors.py new file mode 100644 index 00000000..f3b33a6c --- /dev/null +++ b/tests/test_http_errors.py @@ -0,0 +1,94 @@ +import pytest +from dataverse_sdk.errors import HttpError +from dataverse_sdk import error_codes as ec +from dataverse_sdk.odata import ODataClient + +class DummyAuth: + def acquire_token(self, scope): + class T: access_token = "x" + return T() + +class DummyHTTP: + def __init__(self, responses): + self._responses = responses + def request(self, method, url, **kwargs): + if not self._responses: + raise AssertionError("No more responses") + status, headers, body = self._responses.pop(0) + class R: + pass + r = R() + r.status_code = status + r.headers = headers + if isinstance(body, dict): + import json + r.text = json.dumps(body) + def json_func(): return body + r.json = json_func + else: + r.text = body or "" + def json_fail(): raise ValueError("non-json") + r.json = json_fail + return r + +class TestClient(ODataClient): + def __init__(self, responses): + super().__init__(DummyAuth(), "https://org.example", None) + self._http = DummyHTTP(responses) + +# --- Tests --- + +def test_http_404_subcode_and_service_code(): + responses = [( + 404, + {"x-ms-correlation-request-id": "cid1"}, + {"error": {"code": "0x800404", "message": "Not found"}}, + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts(abc)") + err = ei.value.to_dict() + assert err["subcode"] == ec.HTTP_404 + assert err["details"]["service_error_code"] == "0x800404" + + +def test_http_429_transient_and_retry_after(): + responses = [( + 429, + {"Retry-After": "7"}, + {"error": {"message": "Throttle"}}, + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts") + err = ei.value.to_dict() + assert err["is_transient"] is True + assert err["subcode"] == ec.HTTP_429 + assert err["details"]["retry_after"] == 7 + + +def test_http_500_body_excerpt(): + responses = [( + 500, + {}, + "Internal failure XYZ stack truncated", + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts") + err = ei.value.to_dict() + assert err["subcode"] == ec.HTTP_500 + assert "XYZ stack" in err["details"]["body_excerpt"] + + +def test_http_non_mapped_status_code_subcode_fallback(): + responses = [( + 418, # I'm a teapot (not in map) + {}, + {"error": {"message": "Teapot"}}, + )] + c = TestClient(responses) + with pytest.raises(HttpError) as ei: + c._request("get", c.api + "/accounts") + err = ei.value.to_dict() + assert err["subcode"] == "http_418" diff --git a/tests/test_logical_crud.py b/tests/test_logical_crud.py index 8b552fee..86ce2242 100644 --- a/tests/test_logical_crud.py +++ b/tests/test_logical_crud.py @@ -1,6 +1,7 @@ import types import pytest from dataverse_sdk.odata import ODataClient +from dataverse_sdk.errors import MetadataError class DummyAuth: def acquire_token(self, scope): @@ -73,7 +74,8 @@ def test_single_create_update_delete_get(): (204, {}, {}), # delete ] c = TestableClient(responses) - rid = c._create("account", {"name": "Acme"}) + entity_set = c._entity_set_from_logical("account") + rid = c._create(entity_set, "account", {"name": "Acme"}) assert rid == guid rec = c._get("account", rid, select="accountid,name") assert rec["accountid"] == guid and rec["name"] == "Acme" @@ -92,7 +94,8 @@ def test_bulk_create_and_update(): (204, {}, {}), # UpdateMultiple 1:1 ] c = TestableClient(responses) - ids = c._create("account", [{"name": "A"}, {"name": "B"}]) + entity_set = c._entity_set_from_logical("account") + ids = c._create_multiple(entity_set, "account", [{"name": "A"}, {"name": "B"}]) assert ids == [g1, g2] c._update_by_ids("account", ids, {"statecode": 1}) # broadcast c._update_by_ids("account", ids, [{"name": "A1"}, {"name": "B1"}]) # per-record @@ -115,5 +118,5 @@ def test_unknown_logical_name_raises(): (200, {}, {"value": []}), # metadata lookup returns empty ] c = TestableClient(responses) - with pytest.raises(RuntimeError): - c._create("nonexistent", {"x": 1}) \ No newline at end of file + with pytest.raises(MetadataError): + c._entity_set_from_logical("nonexistent") \ No newline at end of file