diff --git a/.gitignore b/.gitignore index ce892922..6f1d8a6b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,418 +1,23 @@ -## Ignore Visual Studio temporary files, build results, and -## files generated by popular Visual Studio add-ons. -## -## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore - -# User-specific files -*.rsuser -*.suo -*.user -*.userosscache -*.sln.docstates -*.env - -# User-specific files (MonoDevelop/Xamarin Studio) -*.userprefs - -# Mono auto generated files -mono_crash.* - -# Build results -[Dd]ebug/ -[Dd]ebugPublic/ -[Rr]elease/ -[Rr]eleases/ -x64/ -x86/ -[Ww][Ii][Nn]32/ -[Aa][Rr][Mm]/ -[Aa][Rr][Mm]64/ -[Aa][Rr][Mm]64[Ee][Cc]/ -bld/ -[Oo]bj/ -[Oo]ut/ -[Ll]og/ -[Ll]ogs/ - -# Build results on 'Bin' directories -**/[Bb]in/* -# Uncomment if you have tasks that rely on *.refresh files to move binaries -# (https://github.com/github/gitignore/pull/3736) -#!**/[Bb]in/*.refresh - -# Visual Studio 2015/2017 cache/options directory -.vs/ -# Uncomment if you have tasks that create the project's static files in wwwroot -#wwwroot/ - -# Visual Studio 2017 auto generated files -Generated\ Files/ - -# MSTest test Results -[Tt]est[Rr]esult*/ -[Bb]uild[Ll]og.* -*.trx - -# NUnit -*.VisualState.xml -TestResult.xml -nunit-*.xml - -# Approval Tests result files -*.received.* - -# Build Results of an ATL Project -[Dd]ebugPS/ -[Rr]eleasePS/ -dlldata.c - -# Benchmark Results -BenchmarkDotNet.Artifacts/ - -# .NET Core -project.lock.json -project.fragment.lock.json -artifacts/ - -# ASP.NET Scaffolding -ScaffoldingReadMe.txt - -# StyleCop -StyleCopReport.xml - -# Files built by Visual Studio -*_i.c -*_p.c -*_h.h -*.ilk -*.meta -*.obj -*.idb -*.iobj -*.pch -*.pdb -*.ipdb -*.pgc -*.pgd -*.rsp -# but not Directory.Build.rsp, as it configures directory-level build defaults -!Directory.Build.rsp -*.sbr -*.tlb -*.tli -*.tlh -*.tmp -*.tmp_proj -*_wpftmp.csproj -*.log -*.tlog -*.vspscc -*.vssscc -.builds -*.pidb -*.svclog -*.scc - -# Chutzpah Test files -_Chutzpah* - -# Visual C++ cache files -ipch/ -*.aps -*.ncb -*.opendb -*.opensdf -*.sdf -*.cachefile -*.VC.db -*.VC.VC.opendb - -# Visual Studio profiler -*.psess -*.vsp -*.vspx -*.sap - -# Visual Studio Trace Files -*.e2e - -# TFS 2012 Local Workspace -$tf/ - -# Guidance Automation Toolkit -*.gpState - -# ReSharper is a .NET coding add-in -_ReSharper*/ -*.[Rr]e[Ss]harper -*.DotSettings.user - -# TeamCity is a build add-in -_TeamCity* - -# DotCover is a Code Coverage Tool -*.dotCover - -# AxoCover is a Code Coverage Tool -.axoCover/* -!.axoCover/settings.json - -# Coverlet is a free, cross platform Code Coverage Tool -coverage*.json -coverage*.xml -coverage*.info - -# Visual Studio code coverage results -*.coverage -*.coveragexml - -# NCrunch -_NCrunch_* -.NCrunch_* -.*crunch*.local.xml -nCrunchTemp_* - -# MightyMoose -*.mm.* -AutoTest.Net/ - -# Web workbench (sass) -.sass-cache/ - -# Installshield output folder -[Ee]xpress/ - -# DocProject is a documentation generator add-in -DocProject/buildhelp/ -DocProject/Help/*.HxT -DocProject/Help/*.HxC -DocProject/Help/*.hhc -DocProject/Help/*.hhk -DocProject/Help/*.hhp -DocProject/Help/Html2 -DocProject/Help/html - -# Click-Once directory -publish/ - -# Publish Web Output -*.[Pp]ublish.xml -*.azurePubxml -# Note: Comment the next line if you want to checkin your web deploy settings, -# but database connection strings (with potential passwords) will be unencrypted -*.pubxml -*.publishproj - -# Microsoft Azure Web App publish settings. Comment the next line if you want to -# checkin your Azure Web App publish settings, but sensitive information contained -# in these scripts will be unencrypted -PublishScripts/ - -# NuGet Packages -*.nupkg -# NuGet Symbol Packages -*.snupkg -# The packages folder can be ignored because of Package Restore -**/[Pp]ackages/* -# except build/, which is used as an MSBuild target. -!**/[Pp]ackages/build/ -# Uncomment if necessary however generally it will be regenerated when needed -#!**/[Pp]ackages/repositories.config -# NuGet v3's project.json files produces more ignorable files -*.nuget.props -*.nuget.targets - -# Microsoft Azure Build Output -csx/ -*.build.csdef - -# Microsoft Azure Emulator -ecf/ -rcf/ - -# Windows Store app package directories and files -AppPackages/ -BundleArtifacts/ -Package.StoreAssociation.xml -_pkginfo.txt -*.appx -*.appxbundle -*.appxupload - -# Visual Studio cache files -# files ending in .cache can be ignored -*.[Cc]ache -# but keep track of directories ending in .cache -!?*.[Cc]ache/ - -# Others -ClientBin/ -~$* -*~ -*.dbmdl -*.dbproj.schemaview -*.jfm -*.pfx -*.publishsettings -orleans.codegen.cs - -# Including strong name files can present a security risk -# (https://github.com/github/gitignore/pull/2483#issue-259490424) -#*.snk - -# Since there are multiple workflows, uncomment next line to ignore bower_components -# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) -#bower_components/ - -# RIA/Silverlight projects -Generated_Code/ - -# Backup & report files from converting an old project file -# to a newer Visual Studio version. Backup files are not needed, -# because we have git ;-) -_UpgradeReport_Files/ -Backup*/ -UpgradeLog*.XML -UpgradeLog*.htm -ServiceFabricBackup/ -*.rptproj.bak - -# SQL Server files -*.mdf -*.ldf -*.ndf - -# Business Intelligence projects -*.rdl.data -*.bim.layout -*.bim_*.settings -*.rptproj.rsuser -*- [Bb]ackup.rdl -*- [Bb]ackup ([0-9]).rdl -*- [Bb]ackup ([0-9][0-9]).rdl - -# Microsoft Fakes -FakesAssemblies/ - -# GhostDoc plugin setting file -*.GhostDoc.xml - -# Node.js Tools for Visual Studio -.ntvs_analysis.dat -node_modules/ - -# Visual Studio 6 build log -*.plg - -# Visual Studio 6 workspace options file -*.opt - -# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) -*.vbw - -# Visual Studio 6 auto-generated project file (contains which files were open etc.) -*.vbp - -# Visual Studio 6 workspace and project file (working project files containing files to include in project) -*.dsw -*.dsp - -# Visual Studio 6 technical files -*.ncb -*.aps - -# Visual Studio LightSwitch build output -**/*.HTMLClient/GeneratedArtifacts -**/*.DesktopClient/GeneratedArtifacts -**/*.DesktopClient/ModelManifest.xml -**/*.Server/GeneratedArtifacts -**/*.Server/ModelManifest.xml -_Pvt_Extensions - -# Paket dependency manager -**/.paket/paket.exe -paket-files/ - -# FAKE - F# Make -**/.fake/ - -# CodeRush personal settings -**/.cr/personal - -# Python Tools for Visual Studio (PTVS) -**/__pycache__/ +__pycache__/ *.pyc - -# Cake - Uncomment if you are using it -#tools/** -#!tools/packages.config - -# Tabs Studio -*.tss - -# Telerik's JustMock configuration file -*.jmconfig - -# BizTalk build output -*.btp.cs -*.btm.cs -*.odx.cs -*.xsd.cs - -# OpenCover UI analysis results -OpenCover/ - -# Azure Stream Analytics local run output -ASALocalRun/ - -# MSBuild Binary and Structured Log -*.binlog -MSBuild_Logs/ - -# AWS SAM Build and Temporary Artifacts folder -.aws-sam - -# NVidia Nsight GPU debugger configuration file -*.nvuser - -# MFractors (Xamarin productivity tool) working folder -**/.mfractor/ - -# Local History for Visual Studio -**/.localhistory/ - -# Visual Studio History (VSHistory) files -.vshistory/ - -# BeatPulse healthcheck temp database -healthchecksdb - -# Backup folder for Package Reference Convert tool in Visual Studio 2017 -MigrationBackup/ - -# Ionide (cross platform F# VS Code tools) working folder -**/.ionide/ - -# Fody - auto-generated XML schema -FodyWeavers.xsd - -# VS Code files for those working on multiple tools -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -!.vscode/*.code-snippets - -# Local History for Visual Studio Code -.history/ - -# Built Visual Studio Code Extensions -*.vsix - -# Windows Installer files from build outputs -*.cab -*.msi -*.msix -*.msm -*.msp +.env +.venv/ +.envrc +.cache/ +.pytest_cache/ + +# Build & packaging artifacts +build/ +dist/ +*.egg-info/ +.eggs/ + +# Coverage +.coverage +htmlcov/ + +# IDE/OS noise +.vscode/ +.idea/ +Thumbs.db +.DS_Store diff --git a/README.md b/README.md index ad2e7bee..0bd8adae 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,157 @@ -# Project +# Dataverse SDK (Python) — Proof of Concept -> This repo has been populated by an initial template to help get you started. Please -> make sure to update the content to build a great experience for community-building. +A minimal Python SDK to use Microsoft Dataverse as a database for Azure AI Foundry–style apps. -As the maintainer of this project, please make a few updates: +- Read (SQL) — Execute read-only T‑SQL via the McpExecuteSqlQuery Custom API. Returns `list[dict]`. +- OData CRUD — Thin wrappers over Dataverse Web API (create/get/update/delete). +- Metadata helpers — Create/inspect/delete simple custom tables (EntityDefinitions + Attributes). +- Pandas helpers — Convenience DataFrame oriented wrappers for quick prototyping/notebooks. +- Auth — Azure Identity (`TokenCredential`) injection. -- Improving this README.MD file to provide a great experience -- Updating SUPPORT.MD with content about this project's support experience -- Understanding the security reporting process in SECURITY.MD -- Remove this section from the README +## Features + +- Simple `DataverseClient` facade for CRUD, SQL (read-only), and table metadata. +- SQL-over-API: T-SQL routed through Custom API endpoint (no ODBC / TDS driver required). +- Table metadata ops: create simple custom tables with primitive columns (string/int/decimal/float/datetime/bool) and delete them. +- Optional pandas integration (`PandasODataClient`) for DataFrame based create / get / query. + +Auth: +- Credential is optional; if omitted, the SDK uses `DefaultAzureCredential`. +- You can pass any `azure.core.credentials.TokenCredential` you prefer; examples use `InteractiveBrowserCredential` for local runs. +- Token scope used by the SDK: `https://.crm.dynamics.com/.default` (derived from `base_url`). + +## Install + +Create and activate a Python 3.13+ environment, then install dependencies: + +```powershell +# from the repo root +python -m pip install -r requirements.txt +``` + +Direct TDS via ODBC is not used; SQL reads are executed via the Custom API over OData. + +## Configuration Notes + +- For Web API (OData), tokens target your Dataverse org URL scope: https://yourorg.crm.dynamics.com/.default. The SDK requests this scope from the provided TokenCredential. +- For complete functionalities, please use one of the PREPROD BAP environments, otherwise McpExecuteSqlQuery might not work. + +### Configuration (DataverseConfig) + +Pass a `DataverseConfig` or rely on sane defaults: + +```python +from dataverse_sdk import DataverseClient +from dataverse_sdk.config import DataverseConfig + +cfg = DataverseConfig() # defaults: language_code=1033, sql_api_name="McpExecuteSqlQuery" +client = DataverseClient(base_url="https://yourorg.crm.dynamics.com", config=cfg) + +# Optional HTTP tunables (timeouts/retries) +# cfg.http_retries, cfg.http_backoff, cfg.http_timeout +``` + +## Quickstart + +Edit `examples/quickstart.py` and run: + +```powershell +python examples/quickstart.py +``` + +The quickstart demonstrates: +- Creating a simple custom table (metadata APIs) +- Creating, reading, updating, and deleting records (OData) +- Executing a read-only SQL query + +## Examples + +### DataverseClient (recommended) + +Tip: You can omit the credential and the SDK will use `DefaultAzureCredential` automatically: + +```python +from dataverse_sdk import DataverseClient + +base_url = "https://yourorg.crm.dynamics.com" +client = DataverseClient(base_url=base_url) # uses DefaultAzureCredential by default +``` + +```python +from azure.identity import DefaultAzureCredential +from dataverse_sdk import DataverseClient + +base_url = "https://yourorg.crm.dynamics.com" +client = DataverseClient(base_url=base_url, credential=DefaultAzureCredential()) + +# Create (returns created record) +created = client.create("accounts", {"name": "Acme, Inc.", "telephone1": "555-0100"}) +account_id = created["accountid"] + +# Read +account = client.get("accounts", account_id) + +# Update (returns updated record) +updated = client.update("accounts", account_id, {"telephone1": "555-0199"}) + +# Delete +client.delete("accounts", account_id) + +# SQL (read-only) via Custom API +rows = client.query_sql("SELECT TOP 3 accountid, name FROM account ORDER BY createdon DESC") +for r in rows: + print(r.get("accountid"), r.get("name")) +``` + +### Custom table (metadata) example + +```python +# Create a simple custom table and a few primitive columns +info = client.create_table( + "SampleItem", # friendly name; defaults to SchemaName new_SampleItem + { + "code": "string", + "count": "int", + "amount": "decimal", + "when": "datetime", + "active": "bool", + }, +) + +entity_set = info["entity_set_name"] # e.g., "new_sampleitems" +logical = info["entity_logical_name"] # e.g., "new_sampleitem" + +# Create a record in the new table +# Set your publisher prefix (used when creating the table). If you used the default, it's "new". +prefix = "new" +name_attr = f"{prefix}_name" +id_attr = f"{logical}id" + +rec = client.create(entity_set, {name_attr: "Sample A"}) + +# Clean up +client.delete(entity_set, rec[id_attr]) # delete record +client.delete_table("SampleItem") # delete the table +``` + +Notes: +- `create/update` return the full record using `Prefer: return=representation`. +- For CRUD methods that take a record id, pass the GUID string (36-char hyphenated). Parentheses around the GUID are accepted but not required. +- SQL is routed through the Custom API named in `DataverseConfig.sql_api_name` (default: `McpExecuteSqlQuery`). + + + +### Pandas helpers + +See `examples/quickstart_pandas.py` for a DataFrame workflow via `PandasODataClient`. + +VS Code Tasks +- Install deps: `Install deps (pip)` +- Run example: `Run Quickstart (Dataverse SDK)` + +## Limitations / Future Work +- No batching, upsert, or association operations yet. +- Minimal retry policy in library (network-error only); examples include additional backoff for transient Dataverse consistency. ## Contributing diff --git a/examples/quickstart.py b/examples/quickstart.py new file mode 100644 index 00000000..7d84ac41 --- /dev/null +++ b/examples/quickstart.py @@ -0,0 +1,341 @@ +import sys +from pathlib import Path + +# Add src to PYTHONPATH for local runs +sys.path.append(str(Path(__file__).resolve().parents[1] / "src")) + +from dataverse_sdk import DataverseClient +from azure.identity import InteractiveBrowserCredential +import traceback +import requests +import time + +base_url = 'https://aurorabapenv0f528.crm10.dynamics.com' +client = DataverseClient(base_url=base_url, credential=InteractiveBrowserCredential()) + +# Small helpers: call logging and step pauses +def log_call(call: str) -> None: + print({"call": call}) + +def pause(next_step: str) -> None: + # No-op (env-free quickstart) + return + +def plan_call(call: str) -> None: + print({"plan": call}) + +# Small generic backoff helper used only in this quickstart +# Include common transient statuses like 429/5xx to improve resilience. +def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403, 404, 409, 412, 429, 500, 502, 503, 504), retry_if=None): + last_exc = None + for delay in delays: + if delay: + time.sleep(delay) + try: + return op() + except Exception as ex: + print(f'Request failed: {ex}') + last_exc = ex + if retry_if and retry_if(ex): + continue + if isinstance(ex, requests.exceptions.HTTPError): + code = getattr(getattr(ex, 'response', None), 'status_code', None) + if code in retry_http_statuses: + continue + break + if last_exc: + raise last_exc + +print("Ensure custom table exists (Metadata):") +table_info = None +created_this_run = False + +# First check for existing table +log_call("client.get_table_info('SampleItem')") +existing = client.get_table_info("SampleItem") +if existing: + table_info = existing + created_this_run = False + print({ + "table": table_info.get("entity_schema"), + "existed": True, + "entity_set": table_info.get("entity_set_name"), + "logical": table_info.get("entity_logical_name"), + "metadata_id": table_info.get("metadata_id"), + }) + +else: + # Create it since it doesn't exist + try: + log_call("client.create_table('SampleItem', schema={code,count,amount,when,active})") + table_info = client.create_table( + "SampleItem", + { + "code": "string", + "count": "int", + "amount": "decimal", + "when": "datetime", + "active": "bool", + }, + ) + created_this_run = True if table_info and table_info.get("columns_created") else False + print({ + "table": table_info.get("entity_schema") if table_info else None, + "existed": False, + "entity_set": table_info.get("entity_set_name") if table_info else None, + "logical": table_info.get("entity_logical_name") if table_info else None, + "metadata_id": table_info.get("metadata_id") if table_info else None, + }) + except Exception as e: + # Print full stack trace and any HTTP response details if present + print("Create table failed:") + traceback.print_exc() + resp = getattr(e, 'response', None) + if resp is not None: + try: + print({ + "status": resp.status_code, + "url": getattr(resp, 'url', None), + "body": resp.text[:2000] if getattr(resp, 'text', None) else None, + }) + except Exception: + pass + # Fail fast: all operations must use the custom table + sys.exit(1) +entity_set = table_info.get("entity_set_name") +logical = table_info.get("entity_logical_name") or entity_set.rstrip("s") + +# Derive attribute logical name prefix from the entity logical name (segment before first underscore) +attr_prefix = logical.split("_", 1)[0] if "_" in logical else logical +code_key = f"{attr_prefix}_code" +count_key = f"{attr_prefix}_count" +amount_key = f"{attr_prefix}_amount" +when_key = f"{attr_prefix}_when" +id_key = f"{logical}id" + +def summary_from_record(rec: dict) -> dict: + return { + "code": rec.get(code_key), + "count": rec.get(count_key), + "amount": rec.get(amount_key), + "when": rec.get(when_key), + } + +def print_line_summaries(label: str, summaries: list[dict]) -> None: + print(label) + for s in summaries: + print( + f" - id={s.get('id')} code={s.get('code')} " + f"count={s.get('count')} amount={s.get('amount')} when={s.get('when')}" + ) + +# 2) Create a record in the new table +print("Create records (OData):") +# Show planned creates before executing +for _ in range(3): + plan_call(f"client.create('{entity_set}', payload)") +pause("Execute Create") +record_ids: list[str] = [] +created_recs: list[dict] = [] +create_payloads = [ + { + f"{attr_prefix}_name": "Sample A", + code_key: "X001", + count_key: 42, + amount_key: 123.45, + when_key: "2025-01-01", + f"{attr_prefix}_active": True, + }, + { + f"{attr_prefix}_name": "Sample B", + code_key: "X002", + count_key: 7, + amount_key: 987.65, + when_key: "2025-01-02", + f"{attr_prefix}_active": True, + }, + { + f"{attr_prefix}_name": "Sample C", + code_key: "X003", + count_key: 100, + amount_key: 222.22, + when_key: "2025-01-03", + f"{attr_prefix}_active": False, + }, +] + +try: + for payload in create_payloads: + log_call(f"client.create('{entity_set}', payload)") + rec = backoff_retry(lambda p=payload: client.create(entity_set, p)) + created_recs.append(rec) + rid = rec.get(id_key) + if rid: + record_ids.append(rid) + print({"entity": logical, "created_ids": record_ids}) + # Summarize the created records from the returned payloads + summaries = [] + for rec in created_recs: + summaries.append({"id": rec.get(id_key), **summary_from_record(rec)}) + print_line_summaries("Created record summaries:", summaries) +except Exception as e: + print(f"Create failed: {e}") + sys.exit(1) + +pause("Next: Read record") + +# 3) Read record via OData +print("Read (OData):") +# Show planned reads before executing +if 'record_ids' in locals() and record_ids: + for rid in record_ids: + plan_call(f"client.get('{entity_set}', '{rid}')") +pause("Execute Read") +try: + if record_ids: + summaries = [] + for rid in record_ids: + log_call(f"client.get('{entity_set}', '{rid}')") + rec = backoff_retry(lambda r=rid: client.get(entity_set, r)) + summaries.append({"id": rid, **summary_from_record(rec)}) + print_line_summaries("Read record summaries:", summaries) + else: + raise RuntimeError("No record created; skipping read.") +except Exception as e: + print(f"Get failed: {e}") +# 3.5) Update record, then read again and verify +print("Update (OData) and verify:") +# Show what will be updated and planned update calls, then pause +try: + if not record_ids: + raise RuntimeError("No record created; skipping update.") + + update_data = { + f"{attr_prefix}_code": "X002", + f"{attr_prefix}_count": 99, + f"{attr_prefix}_amount": 543.21, + f"{attr_prefix}_when": "2025-02-02", + f"{attr_prefix}_active": False, + } + expected_checks = { + f"{attr_prefix}_code": "X002", + f"{attr_prefix}_count": 99, + f"{attr_prefix}_active": False, + } + amount_key = f"{attr_prefix}_amount" + + # Describe what is changing + print( + { + "updating_to": { + code_key: update_data[code_key], + count_key: update_data[count_key], + amount_key: update_data[amount_key], + when_key: update_data[when_key], + } + } + ) + + # Choose a single target to update to keep other records different + target_id = record_ids[0] + plan_call(f"client.update('{entity_set}', '{target_id}', update_data)") + pause("Execute Update") + + # Update only the chosen record and summarize + log_call(f"client.update('{entity_set}', '{target_id}', update_data)") + new_rec = backoff_retry(lambda: client.update(entity_set, target_id, update_data)) + # Verify string/int/bool fields + for k, v in expected_checks.items(): + assert new_rec.get(k) == v, f"Field {k} expected {v}, got {new_rec.get(k)}" + # Verify decimal with tolerance + got = new_rec.get(amount_key) + got_f = float(got) if got is not None else None + assert got_f is not None and abs(got_f - 543.21) < 1e-6, f"Field {amount_key} expected 543.21, got {got}" + print({"entity": logical, "updated": True}) + print_line_summaries("Updated record summary:", [{"id": target_id, **summary_from_record(new_rec)}]) +except Exception as e: + print(f"Update/verify failed: {e}") + sys.exit(1) +# 4) Query records via SQL Custom API +print("Query (SQL via Custom API):") +try: + # Try singular logical name first, then plural entity set, with short backoff + import time + + candidates = [logical] + if entity_set and entity_set != logical: + candidates.append(entity_set) + + # Show planned SQL queries before executing + for name in candidates: + plan_call(f"client.query_sql(\"SELECT TOP 2 * FROM {name} ORDER BY {attr_prefix}_amount DESC\")") + pause("Execute SQL Query") + + rows = [] + for name in candidates: + def _run_query(): + log_call(f"client.query_sql(\"SELECT TOP 2 * FROM {name} ORDER BY {attr_prefix}_amount DESC\")") + return client.query_sql(f"SELECT TOP 2 * FROM {name} ORDER BY {attr_prefix}_amount DESC") + def _retry_if(ex: Exception) -> bool: + msg = str(ex) if ex else "" + return ("Invalid table name" in msg) or ("Invalid object name" in msg) + try: + rows = backoff_retry(_run_query, delays=(0, 2, 5), retry_http_statuses=(), retry_if=_retry_if) + logical_for_ids = logical + id_key = f"{logical_for_ids}id" + ids = [r.get(id_key) for r in rows if isinstance(r, dict) and r.get(id_key)] + print({"entity": name, "rows": len(rows) if isinstance(rows, list) else 0, "ids": ids}) + # Print TDS summaries for clarity + tds_summaries = [] + for row in rows if isinstance(rows, list) else []: + tds_summaries.append( + { + "id": row.get(id_key), + "code": row.get(code_key), + "count": row.get(count_key), + "amount": row.get(amount_key), + "when": row.get(when_key), + } + ) + print_line_summaries("TDS record summaries (top 2 by amount):", tds_summaries) + raise SystemExit + except Exception: + continue +except SystemExit: + pass +except Exception as e: + print(f"SQL via Custom API failed: {e}") +# 5) Delete record +print("Delete (OData):") +# Show planned deletes before executing +if 'record_ids' in locals() and record_ids: + for rid in record_ids: + plan_call(f"client.delete('{entity_set}', '{rid}')") +pause("Execute Delete") +try: + if record_ids: + for rid in record_ids: + log_call(f"client.delete('{entity_set}', '{rid}')") + backoff_retry(lambda r=rid: client.delete(entity_set, r)) + print({"entity": logical, "deleted_ids": record_ids}) + else: + raise RuntimeError("No record created; skipping delete.") +except Exception as e: + print(f"Delete failed: {e}") + +pause("Next: Cleanup table") + +# 6) Cleanup: delete the custom table if it exists +print("Cleanup (Metadata):") +try: + # Delete if present, regardless of whether it was created in this run + log_call("client.get_table_info('SampleItem')") + info = client.get_table_info("SampleItem") + if info: + log_call("client.delete_table('SampleItem')") + client.delete_table("SampleItem") + print({"table_deleted": True}) + else: + print({"table_deleted": False, "reason": "not found"}) +except Exception as e: + print(f"Delete table failed: {e}") diff --git a/examples/quickstart_pandas.py b/examples/quickstart_pandas.py new file mode 100644 index 00000000..a2f5ee4c --- /dev/null +++ b/examples/quickstart_pandas.py @@ -0,0 +1,230 @@ +import sys +from pathlib import Path + +# Add src to PYTHONPATH for local runs +sys.path.append(str(Path(__file__).resolve().parents[1] / "src")) + +from dataverse_sdk import DataverseClient +from dataverse_sdk.odata_pandas_wrappers import PandasODataClient +from azure.identity import InteractiveBrowserCredential +import traceback +import requests +import time +import pandas as pd + +base_url = 'https://aurorabapenv0f528.crm10.dynamics.com' +client = DataverseClient(base_url=base_url, credential=InteractiveBrowserCredential()) +# Use the internal OData client for pandas helpers +PANDAS = PandasODataClient(client._get_odata()) + +# Small generic backoff helper used only in this quickstart +# Include common transient statuses like 429/5xx to improve resilience. +def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403, 404, 409, 412, 429, 500, 502, 503, 504), retry_if=None): + last_exc = None + for delay in delays: + if delay: + time.sleep(delay) + try: + return op() + except Exception as ex: + print(f'Request failed: {ex}') + last_exc = ex + if retry_if and retry_if(ex): + continue + if isinstance(ex, requests.exceptions.HTTPError): + code = getattr(getattr(ex, 'response', None), 'status_code', None) + if code in retry_http_statuses: + continue + break + if last_exc: + raise last_exc + +print("(Pandas) Ensure custom table exists (Metadata):") +table_info = None +created_this_run = False + +# First check for existing table +existing = client.get_table_info("SampleItem") +if existing: + table_info = existing + created_this_run = False + print({ + "table": table_info.get("entity_schema"), + "existed": True, + "entity_set": table_info.get("entity_set_name"), + "logical": table_info.get("entity_logical_name"), + "metadata_id": table_info.get("metadata_id"), + }) + +else: + # Create it since it doesn't exist + try: + table_info = client.create_table( + "SampleItem", + { + "code": "string", + "count": "int", + "amount": "decimal", + "when": "datetime", + "active": "bool", + }, + ) + created_this_run = True if table_info and table_info.get("columns_created") else False + print({ + "table": table_info.get("entity_schema") if table_info else None, + "existed": False, + "entity_set": table_info.get("entity_set_name") if table_info else None, + "logical": table_info.get("entity_logical_name") if table_info else None, + "metadata_id": table_info.get("metadata_id") if table_info else None, + }) + except Exception as e: + # Print full stack trace and any HTTP response details if present + print("Create table failed:") + traceback.print_exc() + resp = getattr(e, 'response', None) + if resp is not None: + try: + print({ + "status": resp.status_code, + "url": getattr(resp, 'url', None), + "body": resp.text[:2000] if getattr(resp, 'text', None) else None, + }) + except Exception: + pass + # Fail fast: all operations must use the custom table + sys.exit(1) + +entity_set = table_info.get("entity_set_name") +logical = table_info.get("entity_logical_name") or entity_set.rstrip("s") +# Derive attribute logical name prefix from the entity logical name +attr_prefix = logical.split("_", 1)[0] if "_" in logical else logical +record_data = { + f"{attr_prefix}_name": "Sample X", + f"{attr_prefix}_code": "X001", + f"{attr_prefix}_count": 42, + f"{attr_prefix}_amount": 123.45, + f"{attr_prefix}_when": "2025-01-01", + f"{attr_prefix}_active": True, +} + +# 2) Create a record in the new table +print("(Pandas) Create record (OData via Pandas wrapper):") +record_id = None +try: + record_id = backoff_retry(lambda: PANDAS.create_df(entity_set, pd.Series(record_data))) + print({"entity": logical, "created_id": record_id}) +except Exception as e: + print(f"Create failed: {e}") + sys.exit(1) + +# 3) Read record via OData +print("(Pandas) Read (OData via Pandas wrapper):") +try: + if record_id: + # get_ids returns a DataFrame; fetch single row + df = backoff_retry(lambda: PANDAS.get_ids(entity_set, pd.Series([record_id]))) + print(df.head()) + id_key = f"{logical}id" + rid = df.iloc[0].get(id_key) if not df.empty else None + print({"entity": logical, "read": True, "id": rid}) + else: + raise RuntimeError("No record created; skipping read.") +except Exception as e: + print(f"Get failed: {e}") + +# 3.5) Update record, then read again and verify +print("(Pandas) Update (OData via Pandas wrapper) and verify:") +try: + if not record_id: + raise RuntimeError("No record created; skipping update.") + + update_data = { + f"{attr_prefix}_code": "X002", + f"{attr_prefix}_count": 99, + f"{attr_prefix}_amount": 543.21, + f"{attr_prefix}_when": "2025-02-02", + f"{attr_prefix}_active": False, + } + expected_checks = { + f"{attr_prefix}_code": "X002", + f"{attr_prefix}_count": 99, + f"{attr_prefix}_active": False, + } + amount_key = f"{attr_prefix}_amount" + + # Perform update via Pandas wrapper (returns None), then re-fetch to verify + backoff_retry(lambda: PANDAS.update(entity_set, record_id, pd.Series(update_data))) + print({"entity": logical, "updated": True}) + + # Re-read and verify from DataFrame + after_df = backoff_retry(lambda: PANDAS.get_ids(entity_set, pd.Series([record_id]))) + row = after_df.iloc[0] if not after_df.empty else {} + + # Verify string/int/bool fields + for k, v in expected_checks.items(): + gv = row.get(k) if hasattr(row, 'get') else None + assert gv == v, f"Field {k} expected {v}, got {gv}" + + # Verify decimal with tolerance + got = row.get(amount_key) if hasattr(row, 'get') else None + got_f = float(got) if got is not None else None + assert got_f is not None and abs(got_f - 543.21) < 1e-6, f"Field {amount_key} expected 543.21, got {got}" + + print({"entity": logical, "verified": True}) +except Exception as e: + print(f"Update/verify failed: {e}") + sys.exit(1) + +# 4) Query records via SQL Custom API +print("(Pandas) Query (SQL via Custom API):") +try: + # Try singular logical name first, then plural entity set, with short backoff + import time + + candidates = [logical] + if entity_set and entity_set != logical: + candidates.append(entity_set) + + df_rows = None + for name in candidates: + def _run_query(): + return PANDAS.query_sql_df(f"SELECT TOP 3 * FROM {name} ORDER BY createdon DESC") + def _retry_if(ex: Exception) -> bool: + msg = str(ex) if ex else "" + return ("Invalid table name" in msg) or ("Invalid object name" in msg) + try: + df_rows = backoff_retry(_run_query, delays=(0, 2, 5), retry_http_statuses=(), retry_if=_retry_if) + id_key = f"{logical}id" + ids = df_rows[id_key].dropna().tolist() if (df_rows is not None and id_key in df_rows.columns) else [] + print({"entity": name, "rows": (0 if df_rows is None else len(df_rows)), "ids": ids}) + raise SystemExit + except Exception: + continue +except SystemExit: + pass +except Exception as e: + print(f"SQL via Custom API failed: {e}") + +# 5) Delete record +print("(Pandas) Delete (OData via Pandas wrapper):") +try: + if record_id: + backoff_retry(lambda: PANDAS.delete_ids(entity_set, record_id)) + print({"entity": logical, "deleted": True}) + else: + raise RuntimeError("No record created; skipping delete.") +except Exception as e: + print(f"Delete failed: {e}") + +# 6) Cleanup: delete the custom table if it exists +print("Cleanup (Metadata):") +try: + # Delete if present, regardless of whether it was created in this run + info = client.get_table_info("SampleItem") + if info: + client.delete_table("SampleItem") + print({"table_deleted": True}) + else: + print({"table_deleted": False, "reason": "not found"}) +except Exception as e: + print(f"Delete table failed: {e}") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..a342347b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,32 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "dataverse-sdk-poc" +version = "0.1.0" +description = "POC: Dataverse Python SDK with TDS reads and OData CRUD via SQL router" +authors = [{ name = "POC" }] +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "azure-identity>=1.17.0", + "azure-core>=1.30.2", + "requests>=2.32.0", + "pytest>=8.3.1", + "pandas>=2.2.0", +] + +[tool.setuptools] +# Use the src/ layout +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +# Discover packages under src/, include our package, and exclude non-package folders +where = ["src"] +include = ["dataverse_sdk*"] +exclude = ["tests*", "examples*"] + +[tool.pytest.ini_options] +addopts = "-q" +pythonpath = ["src"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..c2dcf554 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +msal>=1.28.0 +requests>=2.32.0 +pyodbc>=5.1.0 +pytest>=8.3.1 diff --git a/src/dataverse_sdk/__init__.py b/src/dataverse_sdk/__init__.py new file mode 100644 index 00000000..1fced982 --- /dev/null +++ b/src/dataverse_sdk/__init__.py @@ -0,0 +1,3 @@ +from .client import DataverseClient + +__all__ = ["DataverseClient"] diff --git a/src/dataverse_sdk/auth.py b/src/dataverse_sdk/auth.py new file mode 100644 index 00000000..30931cfe --- /dev/null +++ b/src/dataverse_sdk/auth.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + +from azure.identity import DefaultAzureCredential +from azure.core.credentials import TokenCredential + + +@dataclass +class TokenPair: + resource: str + access_token: str + + +class AuthManager: + """Azure Identity-based authentication helper for Dataverse. + + Uses DefaultAzureCredential by default, or a provided TokenCredential. + """ + + def __init__(self, credential: Optional[TokenCredential] = None) -> None: + # Let callers inject any azure.identity credential; default to DAC + self.credential: TokenCredential = credential or DefaultAzureCredential() + + def acquire_token(self, scope: str) -> TokenPair: + """Acquire an access token for the given scope using Azure Identity.""" + token = self.credential.get_token(scope) + return TokenPair(resource=scope, access_token=token.token) diff --git a/src/dataverse_sdk/client.py b/src/dataverse_sdk/client.py new file mode 100644 index 00000000..4d8eaaae --- /dev/null +++ b/src/dataverse_sdk/client.py @@ -0,0 +1,201 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional + +from azure.core.credentials import TokenCredential + +from .auth import AuthManager +from .config import DataverseConfig +from .odata import ODataClient + + +class DataverseClient: + """High-level client for Dataverse operations. + + This client exposes a simple, stable surface for: + - OData CRUD: create, get, update, delete records + - SQL (read-only): execute T-SQL via Dataverse Custom API (no ODBC/TDS driver) + - Table metadata: create, inspect, and delete simple custom tables + + The client owns authentication (Azure Identity) and configuration, and delegates + requests to an internal OData client responsible for HTTP calls and URL shaping. + + Parameters + ---------- + base_url : str + Your Dataverse environment URL, for example: + ``"https://.crm.dynamics.com"``. A trailing slash is ignored. + credential : azure.core.credentials.TokenCredential | None, optional + Any Azure Identity credential. If omitted, the SDK uses + ``DefaultAzureCredential`` internally. + config : DataverseConfig | None, optional + Optional configuration (language code, SQL API name, HTTP timeouts/retries). + + Raises + ------ + ValueError + If ``base_url`` is missing or empty after trimming. + """ + + def __init__( + self, + base_url: str, + credential: Optional[TokenCredential] = None, + config: Optional[DataverseConfig] = None, + ) -> None: + self.auth = AuthManager(credential) + self._base_url = (base_url or "").rstrip("/") + if not self._base_url: + raise ValueError("base_url is required.") + self._config = config or DataverseConfig.from_env() + self._odata: Optional[ODataClient] = None + + def _get_odata(self) -> ODataClient: + """Get or create the internal OData client instance. + + Returns + ------- + ODataClient + The lazily-initialized low-level client used to perform requests. + """ + if self._odata is None: + self._odata = ODataClient(self.auth, self._base_url, self._config) + return self._odata + + # CRUD + def create(self, entity: str, record_data: dict) -> dict: + """Create a record and return its full representation. + + Parameters + ---------- + entity : str + Entity set name (plural logical name), e.g., ``"accounts"``. + record_data : dict + Field-value pairs to set on the new record. + + Returns + ------- + dict + The created record as returned by the Web API (``Prefer: return=representation``). + + Raises + ------ + requests.exceptions.HTTPError + If the request fails (via ``raise_for_status`` in the underlying client). + """ + return self._get_odata().create(entity, record_data) + + def update(self, entity: str, record_id: str, record_data: dict) -> dict: + """Update a record and return its full representation. + + Parameters + ---------- + entity : str + Entity set name (plural logical name). + record_id : str + The record GUID (with or without parentheses). + record_data : dict + Field-value pairs to update. + + Returns + ------- + dict + The updated record payload. + """ + return self._get_odata().update(entity, record_id, record_data) + + def delete(self, entity: str, record_id: str) -> None: + """Delete a record by ID. + + Parameters + ---------- + entity : str + Entity set name (plural logical name). + record_id : str + The record GUID (with or without parentheses). + """ + self._get_odata().delete(entity, record_id) + + def get(self, entity: str, record_id: str) -> dict: + """Fetch a record by ID. + + Parameters + ---------- + entity : str + Entity set name (plural logical name). + record_id : str + The record GUID (with or without parentheses). + + Returns + ------- + dict + The record JSON payload. + """ + return self._get_odata().get(entity, record_id) + + # SQL via Custom API + def query_sql(self, tsql: str): + """Execute a read-only SQL query via the configured Custom API. + + Parameters + ---------- + tsql : str + A SELECT-only T-SQL statement (e.g., ``"SELECT TOP 3 * FROM account"``). + + Returns + ------- + list[dict] + Rows as a list of dictionaries. + """ + return self._get_odata().query_sql(tsql) + + # Table metadata helpers + def get_table_info(self, tablename: str) -> Optional[Dict[str, Any]]: + """Get basic metadata for a custom table if it exists. + + Parameters + ---------- + tablename : str + Friendly name (e.g., ``"SampleItem"``) or full schema name + (e.g., ``"new_SampleItem"``). + + Returns + ------- + dict | None + Dict with keys like ``entity_schema``, ``entity_logical_name``, + ``entity_set_name``, and ``metadata_id``; ``None`` if not found. + """ + return self._get_odata().get_table_info(tablename) + + def create_table(self, tablename: str, schema: Dict[str, str]) -> Dict[str, Any]: + """Create a simple custom table. + + Parameters + ---------- + tablename : str + Friendly name (``"SampleItem"``) or a full schema name (``"new_SampleItem"``). + schema : dict[str, str] + Column definitions mapping logical names (without prefix) to types. + Supported: ``string``, ``int``, ``decimal``, ``float``, ``datetime``, ``bool``. + + Returns + ------- + dict + Metadata summary including ``entity_schema``, ``entity_set_name``, + ``entity_logical_name``, ``metadata_id``, and ``columns_created``. + """ + return self._get_odata().create_table(tablename, schema) + + def delete_table(self, tablename: str) -> None: + """Delete a custom table by name. + + Parameters + ---------- + tablename : str + Friendly name (``"SampleItem"``) or a full schema name (``"new_SampleItem"``). + """ + self._get_odata().delete_table(tablename) + + +__all__ = ["DataverseClient"] + diff --git a/src/dataverse_sdk/config.py b/src/dataverse_sdk/config.py new file mode 100644 index 00000000..8c9dae87 --- /dev/null +++ b/src/dataverse_sdk/config.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True) +class DataverseConfig: + language_code: int = 1033 + sql_api_name: str = "McpExecuteSqlQuery" + + # Optional HTTP tuning (not yet wired everywhere; reserved for future use) + http_retries: Optional[int] = None + http_backoff: Optional[float] = None + http_timeout: Optional[float] = None + + @classmethod + def from_env(cls) -> "DataverseConfig": + # Environment-free defaults + return cls( + language_code=1033, + sql_api_name="McpExecuteSqlQuery", + http_retries=None, + http_backoff=None, + http_timeout=None, + ) diff --git a/src/dataverse_sdk/http.py b/src/dataverse_sdk/http.py new file mode 100644 index 00000000..223e05d8 --- /dev/null +++ b/src/dataverse_sdk/http.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import time +from typing import Any, Optional + +import requests + + +class HttpClient: + def __init__( + self, + *, + retries: Optional[int] = None, + backoff: Optional[float] = None, + timeout: Optional[float] = None, + ) -> None: + self.max_attempts = retries if retries is not None else 5 + self.base_delay = backoff if backoff is not None else 0.5 + self.default_timeout: Optional[float] = timeout + + def request(self, method: str, url: str, **kwargs: Any) -> requests.Response: + # Apply per-method default timeouts if not provided + # Apply default timeout if not provided; fall back to per-method defaults + if "timeout" not in kwargs: + if self.default_timeout is not None: + kwargs["timeout"] = self.default_timeout + else: + m = (method or "").lower() + kwargs["timeout"] = 120 if m in ("post", "delete") else 10 + + # Small backoff retry on network errors only + for attempt in range(self.max_attempts): + try: + return requests.request(method, url, **kwargs) + except requests.exceptions.RequestException: + if attempt == self.max_attempts - 1: + raise + delay = self.base_delay * (2 ** attempt) + time.sleep(delay) + continue diff --git a/src/dataverse_sdk/odata.py b/src/dataverse_sdk/odata.py new file mode 100644 index 00000000..5bf39cf6 --- /dev/null +++ b/src/dataverse_sdk/odata.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional, List +import re +import json + +from .http import HttpClient + + +class ODataClient: + """Dataverse Web API client: CRUD, SQL-over-API, and table metadata helpers.""" + + def __init__(self, auth, base_url: str, config=None) -> None: + self.auth = auth + self.base_url = (base_url or "").rstrip("/") + if not self.base_url: + raise ValueError("base_url is required.") + self.api = f"{self.base_url}/api/data/v9.2" + self.config = config or __import__("dataverse_sdk.config", fromlist=["DataverseConfig"]).DataverseConfig.from_env() + self._http = HttpClient( + retries=self.config.http_retries, + backoff=self.config.http_backoff, + timeout=self.config.http_timeout, + ) + + def _headers(self) -> Dict[str, str]: + """Build standard OData headers with bearer auth.""" + scope = f"{self.base_url}/.default" + token = self.auth.acquire_token(scope).access_token + return { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + "Content-Type": "application/json", + "OData-MaxVersion": "4.0", + "OData-Version": "4.0", + } + + def _request(self, method: str, url: str, **kwargs): + return self._http.request(method, url, **kwargs) + + # ----------------------------- CRUD --------------------------------- + def create(self, entity_set: str, data: Dict[str, Any]) -> Dict[str, Any]: + url = f"{self.api}/{entity_set}" + headers = self._headers().copy() + headers["Prefer"] = "return=representation" + r = self._request("post", url, headers=headers, json=data) + r.raise_for_status() + return r.json() + + def _format_key(self, key: str) -> str: + k = key.strip() + if k.startswith("(") and k.endswith(")"): + return k + if len(k) == 36 and "-" in k: + return f"({k})" + return f"({k})" + + def update(self, entity_set: str, key: str, data: Dict[str, Any]) -> Dict[str, Any]: + url = f"{self.api}/{entity_set}{self._format_key(key)}" + headers = self._headers().copy() + headers["If-Match"] = "*" + headers["Prefer"] = "return=representation" + r = self._request("patch", url, headers=headers, json=data) + r.raise_for_status() + return r.json() + + def delete(self, entity_set: str, key: str) -> None: + url = f"{self.api}/{entity_set}{self._format_key(key)}" + headers = self._headers().copy() + headers["If-Match"] = "*" + r = self._request("delete", url, headers=headers) + r.raise_for_status() + + def get(self, entity_set: str, key: str, select: Optional[str] = None) -> Dict[str, Any]: + params = {} + if select: + params["$select"] = select + url = f"{self.api}/{entity_set}{self._format_key(key)}" + r = self._request("get", url, headers=self._headers(), params=params) + r.raise_for_status() + return r.json() + + # --------------------------- SQL Custom API ------------------------- + def query_sql(self, tsql: str) -> list[dict[str, Any]]: + payload = {"querytext": tsql} + headers = self._headers() + api_name = self.config.sql_api_name + url = f"{self.api}/{api_name}" + r = self._request("post", url, headers=headers, json=payload) + r.raise_for_status() + data = r.json() + if "queryresult" not in data: + raise RuntimeError(f"{api_name} response missing 'queryresult'.") + q = data["queryresult"] + if q is None: + parsed = [] + elif isinstance(q, str): + s = q.strip() + parsed = [] if not s else json.loads(s) + else: + raise RuntimeError(f"Unexpected queryresult type: {type(q)}") + return parsed + + # ---------------------- Table metadata helpers ---------------------- + def _label(self, text: str) -> Dict[str, Any]: + lang = int(self.config.language_code) + return { + "@odata.type": "Microsoft.Dynamics.CRM.Label", + "LocalizedLabels": [ + { + "@odata.type": "Microsoft.Dynamics.CRM.LocalizedLabel", + "Label": text, + "LanguageCode": lang, + } + ], + } + + def _to_pascal(self, name: str) -> str: + parts = re.split(r"[^A-Za-z0-9]+", name) + return "".join(p[:1].upper() + p[1:] for p in parts if p) + + def _get_entity_by_schema(self, schema_name: str) -> Optional[Dict[str, Any]]: + url = f"{self.api}/EntityDefinitions" + params = { + "$select": "MetadataId,LogicalName,SchemaName,EntitySetName", + "$filter": f"SchemaName eq '{schema_name}'", + } + r = self._request("get", url, headers=self._headers(), params=params) + r.raise_for_status() + items = r.json().get("value", []) + return items[0] if items else None + + def _create_entity(self, schema_name: str, display_name: str, attributes: List[Dict[str, Any]]) -> str: + url = f"{self.api}/EntityDefinitions" + payload = { + "@odata.type": "Microsoft.Dynamics.CRM.EntityMetadata", + "SchemaName": schema_name, + "DisplayName": self._label(display_name), + "DisplayCollectionName": self._label(display_name + "s"), + "Description": self._label(f"Custom entity for {display_name}"), + "OwnershipType": "UserOwned", + "HasActivities": False, + "HasNotes": True, + "IsActivity": False, + "Attributes": attributes, + } + headers = self._headers() + r = self._request("post", url, headers=headers, json=payload) + r.raise_for_status() + ent = self._wait_for_entity_ready(schema_name) + if not ent or not ent.get("EntitySetName"): + raise RuntimeError( + f"Failed to create or retrieve entity '{schema_name}' (EntitySetName not available)." + ) + return ent["MetadataId"] + + def _wait_for_entity_ready(self, schema_name: str, delays: Optional[List[int]] = None) -> Optional[Dict[str, Any]]: + import time + delays = delays or [0, 2, 5, 10, 20, 30] + ent: Optional[Dict[str, Any]] = None + for idx, delay in enumerate(delays): + if idx > 0 and delay > 0: + time.sleep(delay) + ent = self._get_entity_by_schema(schema_name) + if ent and ent.get("EntitySetName"): + return ent + return ent + + def _attribute_payload(self, schema_name: str, dtype: str, *, is_primary_name: bool = False) -> Optional[Dict[str, Any]]: + dtype_l = dtype.lower().strip() + label = schema_name.split("_")[-1] + if dtype_l in ("string", "text"): + return { + "@odata.type": "Microsoft.Dynamics.CRM.StringAttributeMetadata", + "SchemaName": schema_name, + "DisplayName": self._label(label), + "RequiredLevel": {"Value": "None"}, + "MaxLength": 200, + "FormatName": {"Value": "Text"}, + "IsPrimaryName": bool(is_primary_name), + } + if dtype_l in ("int", "integer"): + return { + "@odata.type": "Microsoft.Dynamics.CRM.IntegerAttributeMetadata", + "SchemaName": schema_name, + "DisplayName": self._label(label), + "RequiredLevel": {"Value": "None"}, + "Format": "None", + "MinValue": -2147483648, + "MaxValue": 2147483647, + } + if dtype_l in ("decimal", "money"): + return { + "@odata.type": "Microsoft.Dynamics.CRM.DecimalAttributeMetadata", + "SchemaName": schema_name, + "DisplayName": self._label(label), + "RequiredLevel": {"Value": "None"}, + "MinValue": -100000000000.0, + "MaxValue": 100000000000.0, + "Precision": 2, + } + if dtype_l in ("float", "double"): + return { + "@odata.type": "Microsoft.Dynamics.CRM.DoubleAttributeMetadata", + "SchemaName": schema_name, + "DisplayName": self._label(label), + "RequiredLevel": {"Value": "None"}, + "MinValue": -100000000000.0, + "MaxValue": 100000000000.0, + "Precision": 2, + } + if dtype_l in ("datetime", "date"): + return { + "@odata.type": "Microsoft.Dynamics.CRM.DateTimeAttributeMetadata", + "SchemaName": schema_name, + "DisplayName": self._label(label), + "RequiredLevel": {"Value": "None"}, + "Format": "DateOnly", + "ImeMode": "Inactive", + } + if dtype_l in ("bool", "boolean"): + return { + "@odata.type": "Microsoft.Dynamics.CRM.BooleanAttributeMetadata", + "SchemaName": schema_name, + "DisplayName": self._label(label), + "RequiredLevel": {"Value": "None"}, + "OptionSet": { + "@odata.type": "Microsoft.Dynamics.CRM.BooleanOptionSetMetadata", + "TrueOption": { + "Value": 1, + "Label": self._label("True"), + }, + "FalseOption": { + "Value": 0, + "Label": self._label("False"), + }, + "IsGlobal": False, + }, + } + return None + + def get_table_info(self, tablename: str) -> Optional[Dict[str, Any]]: + # Accept tablename as a display/logical root; infer a default schema using 'new_' if not provided. + # If caller passes a full SchemaName, use it as-is. + schema_name = tablename if "_" in tablename else f"new_{self._to_pascal(tablename)}" + entity_schema = schema_name + ent = self._get_entity_by_schema(entity_schema) + if not ent: + return None + return { + "entity_schema": ent.get("SchemaName") or entity_schema, + "entity_logical_name": ent.get("LogicalName"), + "entity_set_name": ent.get("EntitySetName"), + "metadata_id": ent.get("MetadataId"), + "columns_created": [], + } + + def delete_table(self, tablename: str) -> None: + schema_name = tablename if "_" in tablename else f"new_{self._to_pascal(tablename)}" + entity_schema = schema_name + ent = self._get_entity_by_schema(entity_schema) + if not ent or not ent.get("MetadataId"): + raise RuntimeError(f"Table '{entity_schema}' not found.") + metadata_id = ent["MetadataId"] + url = f"{self.api}/EntityDefinitions({metadata_id})" + headers = self._headers() + r = self._request("delete", url, headers=headers) + r.raise_for_status() + + def create_table(self, tablename: str, schema: Dict[str, str]) -> Dict[str, Any]: + # Accept a friendly name and construct a default schema under 'new_'. + # If a full SchemaName is passed (contains '_'), use as-is. + entity_schema = tablename if "_" in tablename else f"new_{self._to_pascal(tablename)}" + + ent = self._get_entity_by_schema(entity_schema) + if ent: + raise RuntimeError(f"Table '{entity_schema}' already exists. No update performed.") + + created_cols: List[str] = [] + primary_attr_schema = "new_Name" if "_" not in entity_schema else f"{entity_schema.split('_',1)[0]}_Name" + attributes: List[Dict[str, Any]] = [] + attributes.append(self._attribute_payload(primary_attr_schema, "string", is_primary_name=True)) + for col_name, dtype in schema.items(): + # Use same publisher prefix segment as entity_schema if present; else default to 'new_'. + publisher = entity_schema.split("_", 1)[0] if "_" in entity_schema else "new" + attr_schema = f"{publisher}_{self._to_pascal(col_name)}" + payload = self._attribute_payload(attr_schema, dtype) + if not payload: + raise ValueError(f"Unsupported column type '{dtype}' for '{col_name}'.") + attributes.append(payload) + created_cols.append(attr_schema) + + metadata_id = self._create_entity(entity_schema, tablename, attributes) + ent2: Dict[str, Any] = self._wait_for_entity_ready(entity_schema) or {} + logical_name = ent2.get("LogicalName") + + return { + "entity_schema": entity_schema, + "entity_logical_name": logical_name, + "entity_set_name": ent2.get("EntitySetName") if ent2 else None, + "metadata_id": metadata_id, + "columns_created": created_cols, + } diff --git a/src/dataverse_sdk/odata_pandas_wrappers.py b/src/dataverse_sdk/odata_pandas_wrappers.py new file mode 100644 index 00000000..300e3586 --- /dev/null +++ b/src/dataverse_sdk/odata_pandas_wrappers.py @@ -0,0 +1,184 @@ +"""Pandas-friendly wrappers around the low-level `ODataClient`. + +These helpers allow using pandas DataFrames / Series / Indexes as inputs and +outputs for common CRUD + query operations. + +Design notes: +* All methods are thin convenience wrappers that iterate row-by-row; no OData + batch requests are issued (future enhancement opportunity). +* create_df: creates one record per row, returning a new DataFrame with an + added id column (default name 'id'). +* update_df: updates records based on an id column; returns a DataFrame with + per-row success booleans and optional error messages. +* delete_ids: deletes a collection of ids (Series, list, or Index) returning a + DataFrame summarizing success/failure. +* get_ids: fetches a set of ids returning a DataFrame of the merged JSON + objects (outer union of keys). Missing keys are NaN. +* query_sql_df: runs a SQL query via Custom API and returns the result rows as + a DataFrame (empty DataFrame if no rows). + +Edge cases & behaviors: +* Empty inputs return empty DataFrames without calling the API. +* Errors on individual rows are captured instead of aborting the whole batch. +* The default id column name is 'id' but can be overridden. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Iterable, List, Optional, Sequence, Any +import re +import json + +import pandas as pd + +from .odata import ODataClient + + +@dataclass +class RowError: + index: int + message: str + + +class PandasODataClient: + """High-level convenience wrapper exposing pandas-friendly methods. + + Parameters + ---------- + odata_client : ODataClient + An initialized low-level client (token acquisition & base URL ready). + """ + + def __init__(self, odata_client: ODataClient) -> None: + self._c = odata_client + + # ---------------------------- Create --------------------------------- + def create_df(self, entity_set: str, record: pd.Series) -> str: + """Create a single record from a pandas Series and return the GUID. + + Parameters + ---------- + entity_set : str + Target Dataverse entity set name (entity set logical plural). + record : pandas.Series + Series whose index labels are field logical names. + + Returns + ------- + str + The created record's GUID. + """ + if not isinstance(record, pd.Series): + raise TypeError("record must be a pandas Series") + payload = {k: v for k, v in record.items()} + created = self._c.create(entity_set, payload) + # Extract primary id from returned representation (first '*id' that looks like a GUID) + if isinstance(created, dict): + for k, v in created.items(): + if isinstance(k, str) and k.lower().endswith("id") and isinstance(v, (str,)): + if re.fullmatch(r"[0-9a-fA-F-]{36}", v.strip() or ""): + return v + raise RuntimeError("Could not determine created record id from returned representation") + + # ---------------------------- Update --------------------------------- + def update(self, entity_set: str, record_id: str, entity_data: pd.Series) -> None: + """Update a single record. + + Parameters + ---------- + entity_set : str + Target Dataverse entity set name (plural logical name). + record_id : str + GUID of the record to update. + entity_data : pandas.Series + Series whose index labels are field logical names; any null (NaN) values + are ignored (not sent). An 'id' key, if present, is ignored. + + Raises + ------ + TypeError + If entity_data is not a Series. + Exception + Propagates underlying HTTP errors from the OData client. + """ + if not isinstance(entity_data, pd.Series): + raise TypeError("entity_data must be a pandas Series") + payload = {k: v for k, v in entity_data.items()} + if not payload: + return # nothing to send + self._c.update(entity_set, record_id, payload) + + # ---------------------------- Delete --------------------------------- + def delete_ids(self, entity_set: str, record_id: str) -> None: + """Delete a collection of record IDs. + """ + self._c.delete(entity_set, record_id) + + # ------------------------------ Get ---------------------------------- + def get_ids(self, entity_set: str, ids: Sequence[str] | pd.Series | pd.Index, select: Optional[Iterable[str]] = None) -> pd.DataFrame: + """Fetch multiple records by ID and return a DataFrame. + + Missing records are included with NaN for fields and an error column entry. + """ + if isinstance(ids, (pd.Series, pd.Index)): + id_list = [str(x) for x in ids.tolist()] + else: + id_list = [str(x) for x in ids] + rows = [] + any_errors = False + for rec_id in id_list: + try: + data = self._c.get(entity_set, rec_id, select=",".join(select) if select else None) + rows.append(data) + except Exception as e: # noqa: BLE001 + any_errors = True + rows.append({"id": rec_id, "error": str(e)}) + if not rows: + return pd.DataFrame(columns=["id"]) + return pd.DataFrame(rows) + + # --------------------------- Query SQL ------------------------------- + def query_sql_df(self, tsql: str) -> pd.DataFrame: + """Execute a SQL query via Custom API and return a DataFrame. + + Empty result -> empty DataFrame (columns inferred only if rows present). + """ + rows: Any = self._c.query_sql(tsql) + + # If API returned a JSON string, parse it + if isinstance(rows, str): + try: + rows = json.loads(rows) + except json.JSONDecodeError as e: # noqa: BLE001 + raise ValueError("query_sql returned a string that is not valid JSON") from e + + # If a dict wrapper came back, try common shapes + if isinstance(rows, dict): + # Shape: {"rows": [...], "columns": [...]} (some APIs) + if "rows" in rows and "columns" in rows and isinstance(rows["rows"], list): + return pd.DataFrame(rows["rows"], columns=rows.get("columns")) + # Shape: {"value": [...]} + if "value" in rows and isinstance(rows["value"], list): + rows = rows["value"] + else: + # Treat single dict payload as one-row result + rows = [rows] + + # Now rows should ideally be a list + if not rows: + return pd.DataFrame() + + if isinstance(rows, list): + if len(rows) == 0: + return pd.DataFrame() + # All dicts -> normal tabular expansion + if all(isinstance(r, dict) for r in rows): + return pd.DataFrame(rows) + # Mixed or scalar list -> single column DataFrame + return pd.DataFrame({"value": rows}) + + # Fallback: wrap anything else + return pd.DataFrame({"value": [rows]}) + +__all__ = ["PandasODataClient"]