diff --git a/README.md b/README.md index 926cc5cf..3eaa7912 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,11 @@ A Python package allowing developers to connect to Dataverse environments for DDL / DML operations. - Read (SQL) — Execute constrained read-only SQL via the Dataverse Web API `?sql=` parameter. Returns `list[dict]`. -- OData CRUD — Unified methods `create(entity, record|records)`, `update(entity, id|ids, patch|patches)`, `delete(entity, id|ids)` plus `get` / `get_multiple`. +- OData CRUD — Unified methods `create(logical_name, record|records)`, `update(logical_name, id|ids, patch|patches)`, `delete(logical_name, id|ids)` plus `get` / `get_multiple`. - Bulk create — Pass a list of records to `create(...)` to invoke the bound `CreateMultiple` action; returns `list[str]` of GUIDs. If any payload omits `@odata.type` the SDK resolves and stamps it (cached). - Bulk update — Provide a list of IDs with a single patch (broadcast) or a list of per‑record patches to `update(...)`; internally uses the bound `UpdateMultiple` action; returns nothing. Each record must include the primary key attribute when sent to UpdateMultiple. - Retrieve multiple (paging) — Generator-based `get_multiple(...)` that yields pages, supports `$top` and Prefer: `odata.maxpagesize` (`page_size`). -- Upload files — Call `upload_file(entity_set, ...)` and a upload method will be auto picked (user can also overwrite the upload mode). See https://learn.microsoft.com/en-us/power-apps/developer/data-platform/file-column-data?tabs=sdk#upload-files +- Upload files — Call `upload_file(logical_name, ...)` and an upload method will be auto picked (you can override the mode). See https://learn.microsoft.com/en-us/power-apps/developer/data-platform/file-column-data?tabs=sdk#upload-files - Metadata helpers — Create/inspect/delete simple custom tables (EntityDefinitions + Attributes). - Pandas helpers — Convenience DataFrame oriented wrappers for quick prototyping/notebooks. - Auth — Azure Identity (`TokenCredential`) injection. @@ -17,8 +17,8 @@ A Python package allowing developers to connect to Dataverse environments for DD - Simple `DataverseClient` facade for CRUD, SQL (read-only), and table metadata. - SQL-over-API: Constrained SQL (single SELECT with limited WHERE/TOP/ORDER BY) via native Web API `?sql=` parameter. - Table metadata ops: create simple custom tables (supports string/int/decimal/float/datetime/bool/optionset) and delete them. -- Bulk create via `CreateMultiple` (collection-bound) by passing `list[dict]` to `create(entity_set, payloads)`; returns list of created IDs. -- Bulk update via `UpdateMultiple` (invoked internally) by calling unified `update(entity_set, ids, patch|patches)`; returns nothing. +- Bulk create via `CreateMultiple` (collection-bound) by passing `list[dict]` to `create(logical_name, payloads)`; returns list of created IDs. +- Bulk update via `UpdateMultiple` (invoked internally) by calling unified `update(logical_name, ids, patch|patches)`; returns nothing. - Retrieve multiple with server-driven paging: `get_multiple(...)` yields lists (pages) following `@odata.nextLink`. Control total via `$top` and per-page via `page_size` (Prefer: `odata.maxpagesize`). - Upload files, using either a single request (supports file size up to 128 MB) or chunk upload under the hood - Optional pandas integration (`PandasODataClient`) for DataFrame based create / get / query. @@ -32,23 +32,23 @@ Auth: | Method | Signature (simplified) | Returns | Notes | |--------|------------------------|---------|-------| -| `create` | `create(entity_set, record_dict)` | `list[str]` (len 1) | Single create; GUID from `OData-EntityId`. | -| `create` | `create(entity_set, list[record_dict])` | `list[str]` | Uses `CreateMultiple`; stamps `@odata.type` if missing. | -| `get` | `get(entity_set, id)` | `dict` | One record; supply GUID (with/without parentheses). | -| `get_multiple` | `get_multiple(entity_set, ..., page_size=None)` | `Iterable[list[dict]]` | Pages yielded (non-empty only). | -| `update` | `update(entity_set, id, patch)` | `None` | Single update; no representation returned. | -| `update` | `update(entity_set, list[id], patch)` | `None` | Broadcast; same patch applied to all IDs. Calls UpdateMultiple web API internally. | -| `update` | `update(entity_set, list[id], list[patch])` | `None` | 1:1 patches; lengths must match. Calls UpdateMultiple web API internally. | -| `delete` | `delete(entity_set, id)` | `None` | Delete one record. | -| `delete` | `delete(entity_set, list[id])` | `None` | Delete many (sequential). | +| `create` | `create(logical_name, record_dict)` | `list[str]` (len 1) | Single create; GUID from `OData-EntityId`. | +| `create` | `create(logical_name, list[record_dict])` | `list[str]` | Uses `CreateMultiple`; stamps `@odata.type` if missing. | +| `get` | `get(logical_name, id)` | `dict` | One record; supply GUID (with/without parentheses). | +| `get_multiple` | `get_multiple(logical_name, ..., page_size=None)` | `Iterable[list[dict]]` | Pages yielded (non-empty only). | +| `update` | `update(logical_name, id, patch)` | `None` | Single update; no representation returned. | +| `update` | `update(logical_name, list[id], patch)` | `None` | Broadcast; same patch applied to all IDs (UpdateMultiple). | +| `update` | `update(logical_name, list[id], list[patch])` | `None` | 1:1 patches; lengths must match (UpdateMultiple). | +| `delete` | `delete(logical_name, id)` | `None` | Delete one record. | +| `delete` | `delete(logical_name, list[id])` | `None` | Delete many (sequential). | | `query_sql` | `query_sql(sql)` | `list[dict]` | Constrained read-only SELECT via `?sql=`. | -| `create_table` | `create_table(name, schema)` | `dict` | Creates custom table + columns. | -| `get_table_info` | `get_table_info(name)` | `dict | None` | Basic table metadata. | +| `create_table` | `create_table(tablename, schema)` | `dict` | Creates custom table + columns. Friendly name (e.g. `SampleItem`) becomes schema `new_SampleItem`; explicit schema name (contains `_`) used as-is. | +| `get_table_info` | `get_table_info(schema_name)` | `dict | None` | Basic table metadata by schema name (e.g. `new_SampleItem`). Friendly names not auto-converted. | | `list_tables` | `list_tables()` | `list[dict]` | Lists non-private tables. | -| `delete_table` | `delete_table(name)` | `None` | Drops custom table. | -| `PandasODataClient.create_df` | `create_df(entity_set, series)` | `str` | Returns GUID (wrapper). | -| `PandasODataClient.update` | `update(entity_set, id, series)` | `None` | Ignores empty Series. | -| `PandasODataClient.get_ids` | `get_ids(entity_set, ids, select=None)` | `DataFrame` | One row per ID (errors inline). | +| `delete_table` | `delete_table(tablename)` | `None` | Drops custom table. Accepts friendly or schema name; friendly converted to `new_`. | +| `PandasODataClient.create_df` | `create_df(logical_name, series)` | `str` | Create one record (returns GUID). | +| `PandasODataClient.update` | `update(logical_name, id, series)` | `None` | Returns None; ignored if Series empty. | +| `PandasODataClient.get_ids` | `get_ids(logical_name, ids, select=None)` | `DataFrame` | One row per ID (errors inline). | | `PandasODataClient.query_sql_df` | `query_sql_df(sql)` | `DataFrame` | DataFrame for SQL results. | Guidelines: @@ -128,30 +128,30 @@ base_url = "https://yourorg.crm.dynamics.com" client = DataverseClient(base_url=base_url, credential=DefaultAzureCredential()) # Create (returns list[str] of new GUIDs) -account_id = client.create("accounts", {"name": "Acme, Inc.", "telephone1": "555-0100"})[0] +account_id = client.create("account", {"name": "Acme, Inc.", "telephone1": "555-0100"})[0] # Read -account = client.get("accounts", account_id) +account = client.get("account", account_id) # Update (returns None) -client.update("accounts", account_id, {"telephone1": "555-0199"}) +client.update("account", account_id, {"telephone1": "555-0199"}) # Bulk update (broadcast) – apply same patch to several IDs -ids = client.create("accounts", [ +ids = client.create("account", [ {"name": "Contoso"}, {"name": "Fabrikam"}, ]) -client.update("accounts", ids, {"telephone1": "555-0200"}) # broadcast patch +client.update("account", ids, {"telephone1": "555-0200"}) # broadcast patch # Bulk update (1:1) – list of patches matches list of IDs -client.update("accounts", ids, [ +client.update("account", ids, [ {"telephone1": "555-1200"}, {"telephone1": "555-1300"}, ]) print({"multi_update": "ok"}) # Delete -client.delete("accounts", account_id) +client.delete("account", account_id) # SQL (read-only) via Web API `?sql=` rows = client.query_sql("SELECT TOP 3 accountid, name FROM account ORDER BY createdon DESC") @@ -160,7 +160,7 @@ for r in rows: ## Bulk create (CreateMultiple) -Pass a list of payloads to `create(entity_set, payloads)` to invoke the collection-bound `Microsoft.Dynamics.CRM.CreateMultiple` action. The method returns `list[str]` of created record IDs. +Pass a list of payloads to `create(logical_name, payloads)` to invoke the collection-bound `Microsoft.Dynamics.CRM.CreateMultiple` action. The method returns `list[str]` of created record IDs. ```python # Bulk create accounts (returns list of GUIDs) @@ -169,7 +169,7 @@ payloads = [ {"name": "Fabrikam"}, {"name": "Northwind"}, ] -ids = client.create("accounts", payloads) +ids = client.create("account", payloads) assert isinstance(ids, list) and all(isinstance(x, str) for x in ids) print({"created_ids": ids}) ``` @@ -180,10 +180,10 @@ Use the unified `update` method for both single and bulk scenarios: ```python # Broadcast -client.update("accounts", ids, {"telephone1": "555-0200"}) +client.update("account", ids, {"telephone1": "555-0200"}) # 1:1 patches (length must match) -client.update("accounts", ids, [ +client.update("account", ids, [ {"telephone1": "555-1200"}, {"telephone1": "555-1300"}, ]) @@ -216,12 +216,11 @@ Notes: ## Retrieve multiple with paging -Use `get_multiple(entity_set, ...)` to stream results page-by-page. You can cap total results with `$top` and hint the per-page size with `page_size` (sets Prefer: `odata.maxpagesize`). +Use `get_multiple(logical_name, ...)` to stream results page-by-page. You can cap total results with `$top` and hint the per-page size with `page_size` (sets Prefer: `odata.maxpagesize`). ```python -# Iterate pages of accounts ordered by name, selecting a few columns pages = client.get_multiple( - "accounts", + "account", select=["accountid", "name", "createdon"], orderby=["name asc"], top=10, # stop after 10 total rows (optional) @@ -235,8 +234,8 @@ for page in pages: # each page is a list[dict] print({"total_rows": total}) ``` -Parameters (all optional except `entity_set`) -- `entity_set`: str — Entity set (plural logical name), e.g., `"accounts"`. +Parameters (all optional except `logical_name`) +- `logical_name`: str — Logical (singular) name, e.g., `"account"`. - `select`: list[str] | None — Columns -> `$select` (comma joined). - `filter`: str | None — OData `$filter` expression (e.g., `contains(name,'Acme') and statecode eq 0`). - `orderby`: list[str] | None — Sort expressions -> `$orderby` (comma joined). @@ -257,7 +256,7 @@ Example (all parameters + expected response) ```python pages = client.get_multiple( - "accounts", + "account", select=["accountid", "name", "createdon", "primarycontactid"], filter="contains(name,'Acme') and statecode eq 0", orderby=["name asc", "createdon desc"], @@ -320,7 +319,6 @@ info = client.create_table( }, ) -entity_set = info["entity_set_name"] # e.g., "new_sampleitems" logical = info["entity_logical_name"] # e.g., "new_sampleitem" # Create a record in the new table @@ -329,11 +327,11 @@ prefix = "new" name_attr = f"{prefix}_name" id_attr = f"{logical}id" -rec = client.create(entity_set, {name_attr: "Sample A"}) +rec_id = client.create(logical, {name_attr: "Sample A"})[0] # Clean up -client.delete(entity_set, rec[id_attr]) # delete record -client.delete_table("SampleItem") # delete the table +client.delete(logical, rec_id) # delete record +client.delete_table("SampleItem") # delete table (friendly name or explicit schema new_SampleItem) ``` Notes: @@ -346,7 +344,7 @@ Notes: ### Pandas helpers -See `examples/quickstart_pandas.py` for a DataFrame workflow via `PandasODataClient`. +`PandasODataClient` is a thin wrapper around the low-level client. All methods accept logical (singular) names (e.g. `account`, `new_sampleitem`), not entity set (plural) names. See `examples/quickstart_pandas.py` for a DataFrame workflow. VS Code Tasks - Install deps: `Install deps (pip)` @@ -356,7 +354,6 @@ VS Code Tasks - No general-purpose OData batching, upsert, or association operations yet. - `DeleteMultiple` not yet exposed. - Minimal retry policy in library (network-error only); examples include additional backoff for transient Dataverse consistency. -- Entity naming conventions in Dataverse: for bulk create the SDK resolves logical names from entity set metadata. ## Contributing diff --git a/examples/quickstart.py b/examples/quickstart.py index 7f9eac4d..3cac3629 100644 --- a/examples/quickstart.py +++ b/examples/quickstart.py @@ -141,8 +141,7 @@ class Status(IntEnum): pass # Fail fast: all operations must use the custom table sys.exit(1) -entity_set = table_info.get("entity_set_name") -logical = table_info.get("entity_logical_name") or entity_set.rstrip("s") +logical = table_info.get("entity_logical_name") # Derive attribute logical name prefix from the entity logical name (segment before first underscore) attr_prefix = logical.split("_", 1)[0] if "_" in logical else logical @@ -261,15 +260,15 @@ def _status_value_for_index(idx: int, use_french: bool): try: # Single create returns list[str] (length 1) - log_call(f"client.create('{entity_set}', single_payload)") - single_ids = backoff_retry(lambda: client.create(entity_set, single_payload)) + log_call(f"client.create('{logical}', single_payload)") + single_ids = backoff_retry(lambda: client.create(logical, single_payload)) if not (isinstance(single_ids, list) and len(single_ids) == 1): raise RuntimeError("Unexpected single create return shape (expected one-element list)") record_ids.extend(single_ids) # Multi create returns list[str] - log_call(f"client.create('{entity_set}', multi_payloads)") - multi_ids = backoff_retry(lambda: client.create(entity_set, multi_payloads)) + log_call(f"client.create('{logical}', multi_payloads)") + multi_ids = backoff_retry(lambda: client.create(logical, multi_payloads)) if isinstance(multi_ids, list): record_ids.extend([mid for mid in multi_ids if isinstance(mid, str)]) else: @@ -301,8 +300,8 @@ def _status_value_for_index(idx: int, use_french: bool): if record_ids: # Read only the first record and move on target = record_ids[0] - log_call(f"client.get('{entity_set}', '{target}')") - rec = backoff_retry(lambda: client.get(entity_set, target)) + log_call(f"client.get('{logical}', '{target}')") + rec = backoff_retry(lambda: client.get(logical, target)) print_line_summaries("Read record summary:", [{"id": target, **summary_from_record(rec)}]) else: raise RuntimeError("No record created; skipping read.") @@ -348,10 +347,10 @@ def _status_value_for_index(idx: int, use_french: bool): pause("Execute Update") # Update only the chosen record and summarize - log_call(f"client.update('{entity_set}', '{target_id}', update_data)") + log_call(f"client.update('{logical}', '{target_id}', update_data)") # Perform update (returns None); follow-up read to verify - backoff_retry(lambda: client.update(entity_set, target_id, update_data)) - verify_rec = backoff_retry(lambda: client.get(entity_set, target_id)) + backoff_retry(lambda: client.update(logical, target_id, update_data)) + verify_rec = backoff_retry(lambda: client.get(logical, target_id)) for k, v in expected_checks.items(): assert verify_rec.get(k) == v, f"Field {k} expected {v}, got {verify_rec.get(k)}" got = verify_rec.get(amount_key) @@ -376,16 +375,16 @@ def _status_value_for_index(idx: int, use_french: bool): id_key: rid, count_key: 100 + idx, # new count values }) - log_call(f"client.update('{entity_set}', <{len(bulk_updates)} ids>, )") + log_call(f"client.update('{logical}', <{len(bulk_updates)} ids>, )") # Unified update handles multiple via list of patches (returns None) - backoff_retry(lambda: client.update(entity_set, subset, bulk_updates)) + backoff_retry(lambda: client.update(logical, subset, bulk_updates)) print({"bulk_update_requested": len(bulk_updates), "bulk_update_completed": True}) # Verify the updated count values by refetching the subset verification = [] # Small delay to reduce risk of any brief replication delay time.sleep(1) for rid in subset: - rec = backoff_retry(lambda rid=rid: client.get(entity_set, rid)) + rec = backoff_retry(lambda rid=rid: client.get(logical, rid)) verification.append({ "id": rid, "count": rec.get(count_key), @@ -443,7 +442,7 @@ def run_paging_demo(label: str, *, top: Optional[int], page_size: Optional[int]) _select = [id_key, code_key, amount_key, when_key, status_key] _orderby = [f"{code_key} asc"] for page in client.get_multiple( - entity_set, + logical, select=_select, filter=None, orderby=_orderby, @@ -496,15 +495,15 @@ def run_paging_demo(label: str, *, top: Optional[int], page_size: Optional[int]) try: if record_ids: max_workers = min(8, len(record_ids)) - log_call(f"concurrent delete {len(record_ids)} items from '{entity_set}' (workers={max_workers})") + log_call(f"concurrent delete {len(record_ids)} items from '{logical}' (workers={max_workers})") successes: list[str] = [] failures: list[dict] = [] def _del_one(rid: str) -> tuple[str, bool, str | None]: try: - log_call(f"client.delete('{entity_set}', '{rid}')") - backoff_retry(lambda: client.delete(entity_set, rid)) + log_call(f"client.delete('{logical}', '{rid}')") + backoff_retry(lambda: client.delete(logical, rid)) return (rid, True, None) except Exception as ex: return (rid, False, str(ex)) diff --git a/examples/quickstart_file_upload.py b/examples/quickstart_file_upload.py index 08952160..e6d2f44a 100644 --- a/examples/quickstart_file_upload.py +++ b/examples/quickstart_file_upload.py @@ -244,10 +244,13 @@ def ensure_file_attribute_generic(schema_name: str, label: str, key_prefix: str) record_id = None try: payload = {name_attr: "File Sample Record"} - log(f"client.create('{entity_set}', payload)") - rec = backoff(lambda: client.create(entity_set, payload)) - record_id = rec.get(f"{logical}id") - print({"record_created": True, "id": record_id}) + log(f"client.create('{logical}', payload)") + created_ids = backoff(lambda: client.create(logical, payload)) + if isinstance(created_ids, list) and created_ids: + record_id = created_ids[0] + else: + raise RuntimeError("Unexpected create return; expected list[str] with at least one GUID") + print({"record_created": True, "id": record_id, "logical": logical}) except Exception as e: # noqa: BLE001 print({"record_created": False, "error": str(e)}) sys.exit(1) @@ -278,7 +281,7 @@ def get_dataset_info(file_path: Path): try: DATASET_FILE, small_file_size, src_hash = get_dataset_info(_GENERATED_TEST_FILE) backoff(lambda: client.upload_file( - entity_set, + logical, record_id, small_file_attr_logical, str(DATASET_FILE), @@ -286,7 +289,7 @@ def get_dataset_info(file_path: Path): )) print({"small_upload_completed": True, "small_source_size": small_file_size}) odata = client._get_odata() - dl_url_single = f"{odata.api}/{entity_set}({record_id})/{small_file_attr_logical}/$value" + dl_url_single = f"{odata.api}/{entity_set}({record_id})/{small_file_attr_logical}/$value" # raw entity_set URL OK resp_single = odata._request("get", dl_url_single, headers=odata._headers()) resp_single.raise_for_status() content_single = resp_single.content or b"" @@ -306,7 +309,7 @@ def get_dataset_info(file_path: Path): print("Small single-request upload demo - REPLACE with 8MB file:") replacement_file, replace_size_small, replace_hash_small = get_dataset_info(_GENERATED_TEST_FILE_8MB) backoff(lambda: client.upload_file( - entity_set, + logical, record_id, small_file_attr_logical, str(replacement_file), @@ -335,7 +338,7 @@ def get_dataset_info(file_path: Path): try: DATASET_FILE, src_size_chunk, src_hash_chunk = get_dataset_info(_GENERATED_TEST_FILE) backoff(lambda: client.upload_file( - entity_set, + logical, record_id, chunk_file_attr_logical, str(DATASET_FILE), @@ -343,7 +346,7 @@ def get_dataset_info(file_path: Path): )) print({"chunk_upload_completed": True}) odata = client._get_odata() - dl_url_chunk = f"{odata.api}/{entity_set}({record_id})/{chunk_file_attr_logical}/$value" + dl_url_chunk = f"{odata.api}/{entity_set}({record_id})/{chunk_file_attr_logical}/$value" # raw entity_set for download resp_chunk = odata._request("get", dl_url_chunk, headers=odata._headers()) resp_chunk.raise_for_status() content_chunk = resp_chunk.content or b"" @@ -358,12 +361,11 @@ def get_dataset_info(file_path: Path): "chunk_download_sha256_prefix": dst_hash_chunk[:16] if dst_hash_chunk else None, "chunk_hash_match": hash_match_chunk, }) - # Now test replacing with an 8MB file print("Streaming chunk upload demo - REPLACE with 8MB file:") replacement_file, replace_size_chunk, replace_hash_chunk = get_dataset_info(_GENERATED_TEST_FILE_8MB) backoff(lambda: client.upload_file( - entity_set, + logical, record_id, chunk_file_attr_logical, str(replacement_file), diff --git a/examples/quickstart_pandas.py b/examples/quickstart_pandas.py index cd854e1c..08d9cf71 100644 --- a/examples/quickstart_pandas.py +++ b/examples/quickstart_pandas.py @@ -102,8 +102,7 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403 # Fail fast: all operations must use the custom table sys.exit(1) -entity_set = table_info.get("entity_set_name") -logical = table_info.get("entity_logical_name") or entity_set.rstrip("s") +logical = table_info.get("entity_logical_name") # Derive attribute logical name prefix from the entity logical name attr_prefix = logical.split("_", 1)[0] if "_" in logical else logical record_data = { @@ -119,7 +118,7 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403 print("(Pandas) Create record (OData via Pandas wrapper):") record_id = None try: - record_id = backoff_retry(lambda: PANDAS.create_df(entity_set, pd.Series(record_data))) + record_id = backoff_retry(lambda: PANDAS.create_df(logical, pd.Series(record_data))) print({"entity": logical, "created_id": record_id}) except Exception as e: print(f"Create failed: {e}") @@ -129,8 +128,7 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403 print("(Pandas) Read (OData via Pandas wrapper):") try: if record_id: - # get_ids returns a DataFrame; fetch single row - df = backoff_retry(lambda: PANDAS.get_ids(entity_set, pd.Series([record_id]))) + df = backoff_retry(lambda: PANDAS.get_ids(logical, pd.Series([record_id]))) print(df.head()) id_key = f"{logical}id" rid = df.iloc[0].get(id_key) if not df.empty else None @@ -161,11 +159,11 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403 amount_key = f"{attr_prefix}_amount" # Perform update via Pandas wrapper (returns None), then re-fetch to verify - backoff_retry(lambda: PANDAS.update(entity_set, record_id, pd.Series(update_data))) + backoff_retry(lambda: PANDAS.update(logical, record_id, pd.Series(update_data))) print({"entity": logical, "updated": True}) # Re-read and verify from DataFrame - after_df = backoff_retry(lambda: PANDAS.get_ids(entity_set, pd.Series([record_id]))) + after_df = backoff_retry(lambda: PANDAS.get_ids(logical, pd.Series([record_id]))) row = after_df.iloc[0] if not after_df.empty else {} # Verify string/int/bool fields @@ -186,32 +184,19 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403 # 4) Query records via SQL (Web API ?sql=) print("(Pandas) Query (SQL via Web API ?sql=):") try: - # Try singular logical name first, then plural entity set, with short backoff import time - candidates = [logical] - if entity_set and entity_set != logical: - candidates.append(entity_set) - - df_rows = None - for name in candidates: - def _run_query(): - id_key = f"{logical}id" - cols = f"{id_key}, {attr_prefix}_code, {attr_prefix}_amount, {attr_prefix}_when" - return PANDAS.query_sql_df(f"SELECT TOP 3 {cols} FROM {name} ORDER BY {attr_prefix}_amount DESC") - def _retry_if(ex: Exception) -> bool: - msg = str(ex) if ex else "" - return ("Invalid table name" in msg) or ("Invalid object name" in msg) - try: - df_rows = backoff_retry(_run_query, delays=(0, 2, 5), retry_http_statuses=(), retry_if=_retry_if) - id_key = f"{logical}id" - ids = df_rows[id_key].dropna().tolist() if (df_rows is not None and id_key in df_rows.columns) else [] - print({"entity": name, "rows": (0 if df_rows is None else len(df_rows)), "ids": ids}) - raise SystemExit - except Exception: - continue -except SystemExit: - pass + def _run_query(): + id_key = f"{logical}id" + cols = f"{id_key}, {attr_prefix}_code, {attr_prefix}_amount, {attr_prefix}_when" + return PANDAS.query_sql_df(f"SELECT TOP 3 {cols} FROM {logical} ORDER BY {attr_prefix}_amount DESC") + def _retry_if(ex: Exception) -> bool: + msg = str(ex) if ex else "" + return ("Invalid table name" in msg) or ("Invalid object name" in msg) + df_rows = backoff_retry(_run_query, delays=(0, 2, 5), retry_http_statuses=(), retry_if=_retry_if) + id_key = f"{logical}id" + ids = df_rows[id_key].dropna().tolist() if (df_rows is not None and id_key in df_rows.columns) else [] + print({"entity": logical, "rows": (0 if df_rows is None else len(df_rows)), "ids": ids}) except Exception as e: print(f"SQL query failed: {e}") @@ -219,7 +204,7 @@ def _retry_if(ex: Exception) -> bool: print("(Pandas) Delete (OData via Pandas wrapper):") try: if record_id: - backoff_retry(lambda: PANDAS.delete_ids(entity_set, record_id)) + backoff_retry(lambda: PANDAS.delete_ids(logical, record_id)) print({"entity": logical, "deleted": True}) else: raise RuntimeError("No record created; skipping delete.") diff --git a/src/dataverse_sdk/client.py b/src/dataverse_sdk/client.py index 670a2b03..186f5030 100644 --- a/src/dataverse_sdk/client.py +++ b/src/dataverse_sdk/client.py @@ -67,13 +67,13 @@ def _get_odata(self) -> ODataClient: return self._odata # ---------------- Unified CRUD: create/update/delete ---------------- - def create(self, entity: str, records: Union[Dict[str, Any], List[Dict[str, Any]]]) -> List[str]: - """Create one or many records; always return list[str] of created IDs. + def create(self, logical_name: str, records: Union[Dict[str, Any], List[Dict[str, Any]]]) -> List[str]: + """Create one or many records by logical (singular) name; returns list[str] of created IDs. Parameters ---------- - entity : str - Entity set name (plural logical name), e.g. "accounts". + logical_name : str + Logical (singular) entity name, e.g. "account". records : dict | list[dict] A single record dict or a list of record dicts. @@ -84,18 +84,19 @@ def create(self, entity: str, records: Union[Dict[str, Any], List[Dict[str, Any] """ od = self._get_odata() if isinstance(records, dict): - rid = od._create_single(entity, records) + rid = od._create(logical_name, records) + # _create returns str on single input if not isinstance(rid, str): - raise TypeError("_create_single did not return GUID string") + raise TypeError("_create (single) did not return GUID string") return [rid] if isinstance(records, list): - ids = od._create_multiple(entity, records) + ids = od._create(logical_name, records) if not isinstance(ids, list) or not all(isinstance(x, str) for x in ids): - raise TypeError("_create_multiple did not return list[str]") + raise TypeError("_create (multi) did not return list[str]") return ids raise TypeError("records must be dict or list[dict]") - def update(self, entity: str, ids: Union[str, List[str]], changes: Union[Dict[str, Any], List[Dict[str, Any]]]) -> None: + def update(self, logical_name: str, ids: Union[str, List[str]], changes: Union[Dict[str, Any], List[Dict[str, Any]]]) -> None: """Update one or many records. Returns None. Usage patterns: @@ -112,31 +113,31 @@ def update(self, entity: str, ids: Union[str, List[str]], changes: Union[Dict[st if isinstance(ids, str): if not isinstance(changes, dict): raise TypeError("For single id, changes must be a dict") - od._update(entity, ids, changes) # discard representation + od._update(logical_name, ids, changes) # discard representation return None if not isinstance(ids, list): raise TypeError("ids must be str or list[str]") - od._update_by_ids(entity, ids, changes) + od._update_by_ids(logical_name, ids, changes) return None - def delete(self, entity: str, ids: Union[str, List[str]]) -> None: + def delete(self, logical_name: str, ids: Union[str, List[str]]) -> None: """Delete one or many records (GUIDs). Returns None.""" od = self._get_odata() if isinstance(ids, str): - od._delete(entity, ids) + od._delete(logical_name, ids) return None if not isinstance(ids, list): raise TypeError("ids must be str or list[str]") - od._delete_multiple(entity, ids) + od._delete_multiple(logical_name, ids) return None - def get(self, entity: str, record_id: str) -> dict: + def get(self, logical_name: str, record_id: str) -> dict: """Fetch a record by ID. Parameters ---------- - entity : str - Entity set name (plural logical name). + logical_name : str + Logical (singular) entity name. record_id : str The record GUID (with or without parentheses). @@ -145,17 +146,17 @@ def get(self, entity: str, record_id: str) -> dict: dict The record JSON payload. """ - return self._get_odata()._get(entity, record_id) + return self._get_odata()._get(logical_name, record_id) def get_multiple( self, - entity: str, + logical_name: str, select: Optional[List[str]] = None, filter: Optional[str] = None, orderby: Optional[List[str]] = None, top: Optional[int] = None, - expand: Optional[List[str]] = None, - page_size: Optional[int] = None, + expand: Optional[List[str]] = None, + page_size: Optional[int] = None, ) -> Iterable[List[Dict[str, Any]]]: """Fetch multiple records page-by-page as a generator. @@ -163,7 +164,7 @@ def get_multiple( Parameters mirror standard OData query options. """ return self._get_odata()._get_multiple( - entity, + logical_name, select=select, filter=filter, orderby=orderby, @@ -255,7 +256,7 @@ def list_tables(self) -> list[str]: # File upload def upload_file( self, - entity_set: str, + logical_name: str, record_id: str, file_name_attribute: str, path: str, @@ -263,12 +264,12 @@ def upload_file( mime_type: Optional[str] = None, if_none_match: bool = True, ) -> None: - """Upload a file to a Dataverse file column with automatic method selection. + """Upload a file to a Dataverse file column using a logical (singular) name. Parameters ---------- - entity_set : str - Target entity set (plural logical name), e.g. "accounts". + logical_name : str + Singular logical table name, e.g. "account". record_id : str GUID of the target record. file_name_attribute : str @@ -276,14 +277,9 @@ def upload_file( path : str Local filesystem path to the file. Stored filename will be the basename of this path. mode : str | None, keyword-only, optional - Upload strategy: "auto" (default), "block", "small", or "chunk". - - "auto": Automatically selects best method based on file size - - "small": Single PATCH request (files <128MB only) - - "chunk": Streaming chunked upload (any size, most efficient for large files) + Upload strategy: "auto" (default), "small", or "chunk". mime_type : str | None, keyword-only, optional - Explicit MIME type to persist with the file (e.g. "application/pdf"). If omitted the - lower-level client attempts to infer from the filename extension and falls back to - ``application/octet-stream``. + Explicit MIME type to persist with the file (e.g. "application/pdf"). if_none_match : bool, keyword-only, optional When True (default), sends ``If-None-Match: null`` to only succeed if the column is currently empty. Set False to always overwrite (uses ``If-Match: *``). @@ -294,7 +290,9 @@ def upload_file( None Returns nothing on success. Raises on failure. """ - self._get_odata().upload_file( + od = self._get_odata() + entity_set = od._entity_set_from_logical(logical_name) + od.upload_file( entity_set, record_id, file_name_attribute, diff --git a/src/dataverse_sdk/odata.py b/src/dataverse_sdk/odata.py index 1aa96c82..57e2c13b 100644 --- a/src/dataverse_sdk/odata.py +++ b/src/dataverse_sdk/odata.py @@ -40,13 +40,9 @@ def __init__( backoff=self.config.http_backoff, timeout=self.config.http_timeout, ) - # Cache: entity set name -> logical name (resolved via metadata lookup) - self._entityset_logical_cache = {} - # Cache: logical name -> entity set name (reverse lookup for SQL endpoint) + # Cache: logical name -> entity set name (plural) resolved from metadata self._logical_to_entityset_cache: dict[str, str] = {} - # Cache: entity set name -> primary id attribute (metadata PrimaryIdAttribute) - self._entityset_primaryid_cache: dict[str, str] = {} - # Cache: logical name -> primary id attribute + # Cache: logical name -> primary id attribute (e.g. accountid) self._logical_primaryid_cache: dict[str, str] = {} # Picklist label cache: (logical_name, attribute_logical) -> {'map': {...}, 'ts': epoch_seconds} self._picklist_label_cache = {} @@ -68,46 +64,47 @@ def _request(self, method: str, url: str, **kwargs): return self._http.request(method, url, **kwargs) # ----------------------------- CRUD --------------------------------- - def _create(self, entity_set: str, data: Union[Dict[str, Any], List[Dict[str, Any]]]) -> Union[str, List[str]]: - """Create one or many records. + def _create(self, logical_name: str, data: Union[Dict[str, Any], List[Dict[str, Any]]]) -> Union[str, List[str]]: + """Create one or many records by logical (singular) name. Parameters ---------- - entity_set : str - Entity set (plural logical name), e.g. "accounts". + logical_name : str + Logical (singular) entity name, e.g. "account". data : dict | list[dict] Single entity payload or list of payloads for batch create. Behaviour --------- + - Resolves entity set once per call via metadata (cached) then issues requests. - Single (dict): POST /{entity_set}. Returns GUID string (no representation fetched). - Multiple (list[dict]): POST /{entity_set}/Microsoft.Dynamics.CRM.CreateMultiple. Returns list[str] of created GUIDs. Multi-create logical name resolution ------------------------------------ - - If any payload omits ``@odata.type`` the client performs a metadata lookup (once per entity set, cached) - to resolve the logical name and stamps ``Microsoft.Dynamics.CRM.`` into missing payloads. - - If all payloads already include ``@odata.type`` no lookup or modification occurs. + - If any payload omits ``@odata.type`` the client stamps ``Microsoft.Dynamics.CRM.``. + - If all payloads already include ``@odata.type`` no modification occurs. Returns ------- str | list[str] Created record GUID (single) or list of created IDs (multi). """ + entity_set = self._entity_set_from_logical(logical_name) if isinstance(data, dict): - return self._create_single(entity_set, data) + return self._create_single(entity_set, logical_name, data) if isinstance(data, list): - return self._create_multiple(entity_set, data) + return self._create_multiple(entity_set, logical_name, data) raise TypeError("data must be dict or list[dict]") # --- Internal helpers --- - def _create_single(self, entity_set: str, record: Dict[str, Any]) -> str: + def _create_single(self, entity_set: str, logical_name: str, record: Dict[str, Any]) -> str: """Create a single record and return its GUID. Relies on OData-EntityId (canonical) or Location header. No response body parsing is performed. Raises RuntimeError if neither header contains a GUID. """ - record = self._convert_labels_to_ints(entity_set, record) + record = self._convert_labels_to_ints(logical_name, record) url = f"{self.api}/{entity_set}" headers = self._headers().copy() r = self._request("post", url, headers=headers, json=record) @@ -128,56 +125,18 @@ def _create_single(self, entity_set: str, record: Dict[str, Any]) -> str: f"Create response missing GUID in OData-EntityId/Location headers (status={getattr(r,'status_code', '?')}). Headers: {header_keys}" ) - def _logical_from_entity_set(self, entity_set: str) -> str: - """Resolve logical name from an entity set using metadata (cached).""" - es = (entity_set or "").strip() - if not es: - raise ValueError("entity_set is required") - cached = self._entityset_logical_cache.get(es) - if cached: - return cached - url = f"{self.api}/EntityDefinitions" - # Escape single quotes in entity set name - es_escaped = self._escape_odata_quotes(es) - params = { - "$select": "LogicalName,EntitySetName,PrimaryIdAttribute", - "$filter": f"EntitySetName eq '{es_escaped}'", - } - r = self._request("get", url, headers=self._headers(), params=params) - r.raise_for_status() - try: - body = r.json() - items = body.get("value", []) if isinstance(body, dict) else [] - except ValueError: - items = [] - if not items: - raise RuntimeError(f"Unable to resolve logical name for entity set '{es}'. Provide @odata.type explicitly.") - md = items[0] - logical = md.get("LogicalName") - if not logical: - raise RuntimeError(f"Metadata response missing LogicalName for entity set '{es}'.") - primary_id_attr = md.get("PrimaryIdAttribute") - self._entityset_logical_cache[es] = logical - if isinstance(primary_id_attr, str) and primary_id_attr: - self._entityset_primaryid_cache[es] = primary_id_attr - self._logical_primaryid_cache[logical] = primary_id_attr - return logical - - def _create_multiple(self, entity_set: str, records: List[Dict[str, Any]]) -> List[str]: + def _create_multiple(self, entity_set: str, logical_name: str, records: List[Dict[str, Any]]) -> List[str]: if not all(isinstance(r, dict) for r in records): raise TypeError("All items for multi-create must be dicts") need_logical = any("@odata.type" not in r for r in records) - logical: Optional[str] = None - if need_logical: - logical = self._logical_from_entity_set(entity_set) enriched: List[Dict[str, Any]] = [] for r in records: - r = self._convert_labels_to_ints(entity_set, r) - if "@odata.type" in r or not logical: + r = self._convert_labels_to_ints(logical_name, r) + if "@odata.type" in r or not need_logical: enriched.append(r) else: nr = r.copy() - nr["@odata.type"] = f"Microsoft.Dynamics.CRM.{logical}" + nr["@odata.type"] = f"Microsoft.Dynamics.CRM.{logical_name}" enriched.append(nr) payload = {"Targets": enriched} # Bound action form: POST {entity_set}/Microsoft.Dynamics.CRM.CreateMultiple @@ -212,24 +171,27 @@ def _create_multiple(self, entity_set: str, records: List[Dict[str, Any]]) -> Li return [] # --- Derived helpers for high-level client ergonomics --- - def _primary_id_attr(self, entity_set: str) -> str: - """Return primary key attribute using metadata (fallback to id).""" - pid = self._entityset_primaryid_cache.get(entity_set) - if pid: - return pid - logical = self._logical_from_entity_set(entity_set) - pid = self._entityset_primaryid_cache.get(entity_set) or self._logical_primaryid_cache.get(logical) + def _primary_id_attr(self, logical_name: str) -> str: + """Return primary key attribute using metadata; error if unavailable.""" + pid = self._logical_primaryid_cache.get(logical_name) if pid: return pid - return f"{logical}id" + # Resolve metadata (populates _logical_primaryid_cache or raises if logical unknown) + self._entity_set_from_logical(logical_name) + pid2 = self._logical_primaryid_cache.get(logical_name) + if pid2: + return pid2 + raise RuntimeError( + f"PrimaryIdAttribute not resolved for logical name '{logical_name}'. Metadata did not include PrimaryIdAttribute." + ) - def _update_by_ids(self, entity_set: str, ids: List[str], changes: Union[Dict[str, Any], List[Dict[str, Any]]]) -> None: + def _update_by_ids(self, logical_name: str, ids: List[str], changes: Union[Dict[str, Any], List[Dict[str, Any]]]) -> None: """Update many records by GUID list using UpdateMultiple under the hood. Parameters ---------- - entity_set : str - Entity set (plural logical name). + logical_name : str + Logical name (singular). ids : list[str] GUIDs of target records. changes : dict | list[dict] @@ -239,10 +201,11 @@ def _update_by_ids(self, entity_set: str, ids: List[str], changes: Union[Dict[st raise TypeError("ids must be list[str]") if not ids: return None - pk_attr = self._primary_id_attr(entity_set) + pk_attr = self._primary_id_attr(logical_name) + entity_set = self._entity_set_from_logical(logical_name) if isinstance(changes, dict): batch = [{pk_attr: rid, **changes} for rid in ids] - self._update_multiple(entity_set, batch) + self._update_multiple(entity_set, logical_name, batch) return None if not isinstance(changes, list): raise TypeError("changes must be dict or list[dict]") @@ -253,15 +216,15 @@ def _update_by_ids(self, entity_set: str, ids: List[str], changes: Union[Dict[st if not isinstance(patch, dict): raise TypeError("Each patch must be a dict") batch.append({pk_attr: rid, **patch}) - self._update_multiple(entity_set, batch) + self._update_multiple(entity_set, logical_name, batch) return None - def _delete_multiple(self, entity_set: str, ids: List[str]) -> None: + def _delete_multiple(self, logical_name: str, ids: List[str]) -> None: """Delete many records by GUID list (simple loop; potential future optimization point).""" if not isinstance(ids, list): raise TypeError("ids must be list[str]") for rid in ids: - self.delete(entity_set, rid) + self._delete(logical_name, rid) return None def _format_key(self, key: str) -> str: @@ -279,13 +242,13 @@ def esc(match): return f"({k})" return f"({k})" - def _update(self, entity_set: str, key: str, data: Dict[str, Any]) -> None: + def _update(self, logical_name: str, key: str, data: Dict[str, Any]) -> None: """Update an existing record. Parameters ---------- - entity_set : str - Entity set name (plural logical name). + logical_name : str + Logical (singular) entity name. key : str Record GUID (with or without parentheses) or alternate key. data : dict @@ -295,20 +258,23 @@ def _update(self, entity_set: str, key: str, data: Dict[str, Any]) -> None: ------- None """ - data = self._convert_labels_to_ints(entity_set, data) + data = self._convert_labels_to_ints(logical_name, data) + entity_set = self._entity_set_from_logical(logical_name) url = f"{self.api}/{entity_set}{self._format_key(key)}" headers = self._headers().copy() headers["If-Match"] = "*" r = self._request("patch", url, headers=headers, json=data) r.raise_for_status() - def _update_multiple(self, entity_set: str, records: List[Dict[str, Any]]) -> None: + def _update_multiple(self, entity_set: str, logical_name: str, records: List[Dict[str, Any]]) -> None: """Bulk update existing records via the collection-bound UpdateMultiple action. Parameters ---------- entity_set : str - Entity set (plural logical name), e.g. "accounts". + Resolved entity set name. + logical_name : str + Logical (singular) name, e.g. "account". records : list[dict] Each dict must include the real primary key attribute for the entity (e.g. ``accountid``) and one or more fields to update. If ``@odata.type`` is omitted in any payload, the logical name is resolved once and @@ -335,17 +301,14 @@ def _update_multiple(self, entity_set: str, records: List[Dict[str, Any]]) -> No # Determine whether we need logical name resolution (@odata.type missing in any payload) need_logical = any("@odata.type" not in r for r in records) - logical: Optional[str] = None - if need_logical: - logical = self._logical_from_entity_set(entity_set) enriched: List[Dict[str, Any]] = [] for r in records: - r = self._convert_labels_to_ints(entity_set, r) - if "@odata.type" in r or not logical: + r = self._convert_labels_to_ints(logical_name, r) + if "@odata.type" in r or not need_logical: enriched.append(r) else: nr = r.copy() - nr["@odata.type"] = f"Microsoft.Dynamics.CRM.{logical}" + nr["@odata.type"] = f"Microsoft.Dynamics.CRM.{logical_name}" enriched.append(nr) payload = {"Targets": enriched} @@ -356,21 +319,22 @@ def _update_multiple(self, entity_set: str, records: List[Dict[str, Any]]) -> No # Intentionally ignore response content: no stable contract for IDs across environments. return None - def _delete(self, entity_set: str, key: str) -> None: + def _delete(self, logical_name: str, key: str) -> None: """Delete a record by GUID or alternate key.""" + entity_set = self._entity_set_from_logical(logical_name) url = f"{self.api}/{entity_set}{self._format_key(key)}" headers = self._headers().copy() headers["If-Match"] = "*" r = self._request("delete", url, headers=headers) r.raise_for_status() - def _get(self, entity_set: str, key: str, select: Optional[str] = None) -> Dict[str, Any]: + def _get(self, logical_name: str, key: str, select: Optional[str] = None) -> Dict[str, Any]: """Retrieve a single record. Parameters ---------- - entity_set : str - Entity set name. + logical_name : str + Logical (singular) name. key : str Record GUID (with or without parentheses) or alternate key syntax. select : str | None @@ -379,6 +343,7 @@ def _get(self, entity_set: str, key: str, select: Optional[str] = None) -> Dict[ params = {} if select: params["$select"] = select + entity_set = self._entity_set_from_logical(logical_name) url = f"{self.api}/{entity_set}{self._format_key(key)}" r = self._request("get", url, headers=self._headers(), params=params) r.raise_for_status() @@ -386,7 +351,7 @@ def _get(self, entity_set: str, key: str, select: Optional[str] = None) -> Dict[ def _get_multiple( self, - entity_set: str, + logical_name: str, select: Optional[List[str]] = None, filter: Optional[str] = None, orderby: Optional[List[str]] = None, @@ -398,8 +363,8 @@ def _get_multiple( Parameters ---------- - entity_set : str - Entity set name (plural logical name). + logical_name : str + Logical (singular) entity name. select : list[str] | None Columns to select; joined with commas into $select. filter : str | None @@ -433,6 +398,7 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st except ValueError: return {} + entity_set = self._entity_set_from_logical(logical_name) base_url = f"{self.api}/{entity_set}" params: Dict[str, Any] = {} if select: @@ -571,7 +537,8 @@ def _entity_set_from_logical(self, logical: str) -> str: except ValueError: items = [] if not items: - raise RuntimeError(f"Unable to resolve entity set for logical name '{logical}'.") + plural_hint = " (did you pass a plural entity set name instead of the singular logical name?)" if logical.endswith("s") and not logical.endswith("ss") else "" + raise RuntimeError(f"Unable to resolve entity set for logical name '{logical}'. Provide the singular logical name.{plural_hint}") md = items[0] es = md.get("EntitySetName") if not es: @@ -580,7 +547,6 @@ def _entity_set_from_logical(self, logical: str) -> str: primary_id_attr = md.get("PrimaryIdAttribute") if isinstance(primary_id_attr, str) and primary_id_attr: self._logical_primaryid_cache[logical] = primary_id_attr - self._entityset_primaryid_cache[es] = primary_id_attr return es # ---------------------- Table metadata helpers ---------------------- @@ -774,7 +740,7 @@ def _normalize_picklist_label(self, label: str) -> str: norm = re.sub(r"\s+", " ", norm).strip().lower() return norm - def _optionset_map(self, entity_set: str, attr_logical: str) -> Optional[Dict[str, int]]: + def _optionset_map(self, logical_name: str, attr_logical: str) -> Optional[Dict[str, int]]: """Build or return cached mapping of normalized label -> value for a picklist attribute. Returns empty dict if attribute is not a picklist or has no options. Returns None only @@ -784,17 +750,16 @@ def _optionset_map(self, entity_set: str, attr_logical: str) -> Optional[Dict[st ----- - This method calls the Web API twice per attribute so it could have perf impact when there are lots of columns on the entity. """ - if not entity_set or not attr_logical: + if not logical_name or not attr_logical: return None - logical = self._logical_from_entity_set(entity_set) - cache_key = (logical, attr_logical.lower()) + cache_key = (logical_name, attr_logical.lower()) now = time.time() entry = self._picklist_label_cache.get(cache_key) if isinstance(entry, dict) and 'map' in entry and (now - entry.get('ts', 0)) < self._picklist_cache_ttl_seconds: return entry['map'] attr_esc = self._escape_odata_quotes(attr_logical) - logical_esc = self._escape_odata_quotes(logical) + logical_esc = self._escape_odata_quotes(logical_name) # Step 1: lightweight fetch (no expand) to determine attribute type url_type = ( @@ -813,7 +778,7 @@ def _optionset_map(self, entity_set: str, attr_logical: str) -> Optional[Dict[st if r_type.status_code == 404: # After retries we still cannot find the attribute definition – treat as fatal so caller sees a clear error. raise RuntimeError( - f"Picklist attribute metadata not found after retries: entity='{logical}' attribute='{attr_logical}' (404)" + f"Picklist attribute metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)" ) r_type.raise_for_status() @@ -841,7 +806,7 @@ def _optionset_map(self, entity_set: str, attr_logical: str) -> Optional[Dict[st if attempt < 2: time.sleep(0.4 * (2 ** attempt)) # 0.4s, 0.8s if r_opts.status_code == 404: - raise RuntimeError(f"Picklist OptionSet metadata not found after retries: entity='{logical}' attribute='{attr_logical}' (404)") + raise RuntimeError(f"Picklist OptionSet metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)") r_opts.raise_for_status() attr_full = {} @@ -876,7 +841,7 @@ def _optionset_map(self, entity_set: str, attr_logical: str) -> Optional[Dict[st self._picklist_label_cache[cache_key] = {'map': {}, 'ts': now} return {} - def _convert_labels_to_ints(self, entity_set: str, record: Dict[str, Any]) -> Dict[str, Any]: + def _convert_labels_to_ints(self, logical_name: str, record: Dict[str, Any]) -> Dict[str, Any]: """Return a copy of record with any labels converted to option ints. Heuristic: For each string value, attempt to resolve against picklist metadata. @@ -886,7 +851,7 @@ def _convert_labels_to_ints(self, entity_set: str, record: Dict[str, Any]) -> Di for k, v in list(out.items()): if not isinstance(v, str) or not v.strip(): continue - mapping = self._optionset_map(entity_set, k) + mapping = self._optionset_map(logical_name, k) if not mapping: continue norm = self._normalize_picklist_label(v) @@ -1056,7 +1021,6 @@ def _create_table(self, tablename: str, schema: Dict[str, Any]) -> Dict[str, Any "metadata_id": metadata_id, "columns_created": created_cols, } - # ---------------------- Cache maintenance ------------------------- def _flush_cache( self, diff --git a/src/dataverse_sdk/odata_pandas_wrappers.py b/src/dataverse_sdk/odata_pandas_wrappers.py index 06e9e102..3f857cbe 100644 --- a/src/dataverse_sdk/odata_pandas_wrappers.py +++ b/src/dataverse_sdk/odata_pandas_wrappers.py @@ -54,13 +54,13 @@ def __init__(self, odata_client: ODataClient) -> None: self._c = odata_client # ---------------------------- Create --------------------------------- - def create_df(self, entity_set: str, record: pd.Series) -> str: + def create_df(self, logical_name: str, record: pd.Series) -> str: """Create a single record from a pandas Series and return the GUID. Parameters ---------- - entity_set : str - Target Dataverse entity set name (entity set logical plural). + logical_name : str + Logical (singular) entity name, e.g. "account". record : pandas.Series Series whose index labels are field logical names. @@ -72,19 +72,19 @@ def create_df(self, entity_set: str, record: pd.Series) -> str: if not isinstance(record, pd.Series): raise TypeError("record must be a pandas Series") payload = {k: v for k, v in record.items()} - created_ids = self._c.create(entity_set, payload) + created_ids = self._c.create(logical_name, payload) if not isinstance(created_ids, list) or len(created_ids) != 1 or not isinstance(created_ids[0], str): raise RuntimeError("Unexpected create return shape (expected single-element list of GUID str)") return created_ids[0] # ---------------------------- Update --------------------------------- - def update(self, entity_set: str, record_id: str, entity_data: pd.Series) -> None: + def update(self, logical_name: str, record_id: str, entity_data: pd.Series) -> None: """Update a single record (returns None). Parameters ---------- - entity_set : str - Target Dataverse entity set name (plural logical name). + logical_name : str + Logical (singular) entity name. record_id : str GUID of the record to update. entity_data : pandas.Series @@ -103,16 +103,39 @@ def update(self, entity_set: str, record_id: str, entity_data: pd.Series) -> Non payload = {k: v for k, v in entity_data.items()} if not payload: return # nothing to send - self._c.update(entity_set, record_id, payload) + self._c.update(logical_name, record_id, payload) # ---------------------------- Delete --------------------------------- - def delete_ids(self, entity_set: str, record_id: str) -> None: - """Delete a collection of record IDs. + def delete_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Index) -> pd.DataFrame: + """Delete a collection of record IDs and return a summary DataFrame. + + Parameters + ---------- + logical_name : str + Logical (singular) entity name. + ids : sequence[str] | pandas.Series | pandas.Index + Collection of GUIDs to delete. + + Returns + ------- + pandas.DataFrame + Columns: id, success (bool), error (str nullable) """ - self._c.delete(entity_set, record_id) + if isinstance(ids, (pd.Series, pd.Index)): + id_list = [str(x) for x in ids.tolist()] + else: + id_list = [str(x) for x in ids] + results = [] + for rid in id_list: + try: + self._c.delete(logical_name, rid) + results.append({"id": rid, "success": True, "error": None}) + except Exception as e: # noqa: BLE001 + results.append({"id": rid, "success": False, "error": str(e)}) + return pd.DataFrame(results) # ------------------------------ Get ---------------------------------- - def get_ids(self, entity_set: str, ids: Sequence[str] | pd.Series | pd.Index, select: Optional[Iterable[str]] = None) -> pd.DataFrame: + def get_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Index, select: Optional[Iterable[str]] = None) -> pd.DataFrame: """Fetch multiple records by ID and return a DataFrame. Missing records are included with NaN for fields and an error column entry. @@ -123,9 +146,15 @@ def get_ids(self, entity_set: str, ids: Sequence[str] | pd.Series | pd.Index, se id_list = [str(x) for x in ids] rows = [] any_errors = False + select_arg = None + if select: + # ensure iterable of strings -> list -> join + select_list = [str(c) for c in select] + if select_list: + select_arg = ",".join(select_list) for rec_id in id_list: try: - data = self._c.get(entity_set, rec_id, select=",".join(select) if select else None) + data = self._c.get(logical_name, rec_id, select=select_arg) rows.append(data) except Exception as e: # noqa: BLE001 any_errors = True diff --git a/src/dataverse_sdk/odata_upload_files.py b/src/dataverse_sdk/odata_upload_files.py index 3f3289bb..29458d74 100644 --- a/src/dataverse_sdk/odata_upload_files.py +++ b/src/dataverse_sdk/odata_upload_files.py @@ -43,9 +43,6 @@ def upload_file( Local filesystem path to the file. mode : str | None Upload strategy: "auto" (default), "small", or "chunk". - - "auto": Automatically selects based on size - - "small": Single PATCH request (files <128MB only) - - "chunk": Streaming chunked upload (any size) mime_type : str | None Explicit MIME type. If omitted falls back to application/octet-stream. if_none_match : bool diff --git a/tests/test_logical_crud.py b/tests/test_logical_crud.py new file mode 100644 index 00000000..e11cd0b2 --- /dev/null +++ b/tests/test_logical_crud.py @@ -0,0 +1,124 @@ +import types +import pytest +from dataverse_sdk.odata import ODataClient + +class DummyAuth: + def acquire_token(self, scope): + class T: access_token = "x" + return T() + +class DummyHTTPClient: + def __init__(self, responses): + self._responses = responses + self.calls = [] + def request(self, method, url, **kwargs): + self.calls.append((method, url, kwargs)) + # Pop next prepared response tuple + if not self._responses: + raise AssertionError("No more dummy responses configured") + status, headers, body = self._responses.pop(0) + resp = types.SimpleNamespace() + resp.status_code = status + resp.headers = headers + resp.text = body if isinstance(body, str) else (body and "{}") + def raise_for_status(): + if status >= 400: + raise RuntimeError(f"HTTP {status}") + return None + resp.raise_for_status = raise_for_status + def json_func(): + import json as _json + if isinstance(body, dict): + return body + try: + return _json.loads(body) if isinstance(body, str) else {} + except Exception: + return {} + resp.json = json_func + return resp + +class TestableClient(ODataClient): + def __init__(self, responses): + super().__init__(DummyAuth(), "https://org.example", None) + self._http = DummyHTTPClient(responses) + +# Helper metadata response for logical name resolution +MD_ACCOUNT = { + "value": [ + { + "LogicalName": "account", + "EntitySetName": "accounts", + "PrimaryIdAttribute": "accountid" + } + ] +} + +MD_SAMPLE = { + "value": [ + { + "LogicalName": "new_sampleitem", + "EntitySetName": "new_sampleitems", + "PrimaryIdAttribute": "new_sampleitemid" + } + ] +} + +def make_entity_create_headers(entity_set, guid): + return {"OData-EntityId": f"https://org.example/api/data/v9.2/{entity_set}({guid})"} + + +def test_single_create_update_delete_get(): + guid = "11111111-2222-3333-4444-555555555555" + # Sequence: metadata lookup, single create, single get, update, delete + responses = [ + (200, {}, MD_ACCOUNT), # metadata for account + (204, make_entity_create_headers("accounts", guid), {}), # create + (200, {}, {"accountid": guid, "name": "Acme"}), # get + (204, {}, {}), # update (no body) + (204, {}, {}), # delete + ] + c = TestableClient(responses) + rid = c._create("account", {"name": "Acme"}) + assert rid == guid + rec = c._get("account", rid, select="accountid,name") + assert rec["accountid"] == guid and rec["name"] == "Acme" + c._update("account", rid, {"telephone1": "555"}) # returns None + c._delete("account", rid) # returns None + + +def test_bulk_create_and_update(): + g1 = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + g2 = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb" + # Sequence: metadata, bulk create, bulk update (broadcast), bulk update (1:1) + responses = [ + (200, {}, MD_ACCOUNT), + (200, {}, {"Ids": [g1, g2]}), # CreateMultiple + (204, {}, {}), # UpdateMultiple broadcast + (204, {}, {}), # UpdateMultiple 1:1 + ] + c = TestableClient(responses) + ids = c._create("account", [{"name": "A"}, {"name": "B"}]) + assert ids == [g1, g2] + c._update_by_ids("account", ids, {"statecode": 1}) # broadcast + c._update_by_ids("account", ids, [{"name": "A1"}, {"name": "B1"}]) # per-record + + +def test_get_multiple_paging(): + # metadata, first page, second page + responses = [ + (200, {}, MD_ACCOUNT), + (200, {}, {"value": [{"accountid": "1"}], "@odata.nextLink": "https://org.example/api/data/v9.2/accounts?$skip=1"}), + (200, {}, {"value": [{"accountid": "2"}]}), + ] + c = TestableClient(responses) + pages = list(c._get_multiple("account", select=["accountid"], page_size=1)) + assert pages == [[{"accountid": "1"}], [{"accountid": "2"}]] + + +def test_unknown_logical_name_raises(): + responses = [ + (200, {}, {"value": []}), # metadata lookup returns empty + ] + c = TestableClient(responses) + with pytest.raises(RuntimeError): + c._create("nonexistent", {"x": 1}) \ No newline at end of file