From f0853722f390b95d1c4f92d1a38bb440dd28c02a Mon Sep 17 00:00:00 2001 From: Nishant Patel Date: Thu, 23 Oct 2025 15:35:49 +0530 Subject: [PATCH 1/4] Copy Files recursively to AIDP from Databricks Folder. --- .../recursive-files-to-aidp.ipynb | 88 +++++++++++++++++++ .../databricks_to_aidp/requirements.txt | 1 + 2 files changed, 89 insertions(+) create mode 100644 aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb create mode 100644 aidp_migration/databricks_to_aidp/requirements.txt diff --git a/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb b/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb new file mode 100644 index 0000000..106196c --- /dev/null +++ b/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb @@ -0,0 +1,88 @@ +{ + "metadata": { + "kernelspec": { + "name": "notebook" + }, + "language_info": { + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python" + }, + "Last_Active_Cell_Index": 5 + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [ + { + "id": "aedb2989-04d1-4e7d-894d-aff632ce0297", + "cell_type": "markdown", + "source": "Oracle AI Data Platform v1.0\n\nCopyright © 2025, Oracle and/or its affiliates.\n\nLicensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/", + "metadata": { + "type": "markdown" + } + }, + { + "id": "a7f00219-2c74-4cba-a12b-67d2eb829168", + "cell_type": "markdown", + "source": "### Sample Code: Exporting Databricks Files to AIDP.\n\nThis example demonstrates how to export files recursively from databricks workspace using `databricks-sdk` Library and write to an **AIDP**.\n\n**Note:** \n\n- Replace all placeholders (e.g., ``, ``, ``, `` etc.) with values specific to your environment before running the notebook. \n- Provide Source to Target String replacement if you wish to do while importing to AIDP.\n- Use with caution: The notebook is designed for exporting notebooks & code related files only.", + "metadata": { + "type": "markdown" + } + }, + { + "id": "6df91459-a48a-4920-9d16-973c61bee150", + "cell_type": "code", + "source": "import os\nimport base64\nfrom databricks.sdk import WorkspaceClient\nfrom databricks.sdk.service import workspace", + "metadata": { + "type": "python", + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "4a233ed1-458c-47bd-9444-63622ea8cf6b", + "cell_type": "code", + "source": "#Databricks Workspace URL\ndatabricks_workspace_url = \"DATABRICKS_WORKSPACE_URL\"\n#Databricks Token\ndatabricks_token = \"DATABRICKS_TOKEN\"\n# Define the Databricks folder you want to export\ndatabricks_path = \"DATABRICKS_PATH\"\n# Define the local AIDP directory to write the exported content\naidp_path = \"AIDP_PATH\"", + "metadata": { + "type": "python", + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "4b8ab52e-0885-4ae2-8864-5b7563d20b79", + "cell_type": "code", + "source": "#Provide Comma Seperated mapping to replace Source String with Target String. These are just string replacement so mapping should be provided carefully.\ndbx_to_aidp_replacement_mappings = {\n \"SOURCE_STR_1\": \"TARGET_STR_1\",\n \"SOURCE_STR_2\": \"TARGET_STR_2\"\n}", + "metadata": { + "type": "python", + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "c9ae41ae-4e57-4fd9-8a59-f360a3cb60ad", + "cell_type": "code", + "source": "#Recursively exports a Databricks workspace folder to a local directory, preserving the nested folder structure and exporting notebooks as .ipynb files.\n\ndef export_folder_recursively(databricks_path: str , aidp_path: str , w: WorkspaceClient):\n\n try:\n # List contents of the current workspace path\n contents = w.workspace.list(path=databricks_path)\n except Exception as e:\n print(f\"Failed to list contents of Databricks path {databricks_path}: {e}\")\n return\n\n for item in contents:\n dbx_item_path = item.path\n\n # Determine the relative path to maintain the nested structure\n dbx_relative_path = os.path.relpath(dbx_item_path , databricks_path)\n aidp_full_path = os.path.join(aidp_path , dbx_relative_path)\n\n if item.object_type == workspace.ObjectType.DIRECTORY:\n # Create the local directory and recurse into it\n os.makedirs(aidp_full_path , exist_ok=True)\n print(f\"Created local directory: {aidp_full_path}\")\n export_folder_recursively(dbx_item_path , aidp_full_path , w)\n elif item.object_type == workspace.ObjectType.FILE or item.object_type == workspace.ObjectType.NOTEBOOK:\n file_name = os.path.basename(dbx_item_path)\n if item.object_type == workspace.ObjectType.NOTEBOOK:\n local_file_path = os.path.join(os.path.dirname(aidp_full_path) , f\"{file_name}.ipynb\")\n format = workspace.ExportFormat.JUPYTER\n else:\n local_file_path = os.path.join(os.path.dirname(aidp_full_path) , file_name)\n format = workspace.ExportFormat.SOURCE\n\n try:\n # Export the file/notebook content\n print(f\"Exporting File/Notebook: {dbx_item_path} to {local_file_path}\")\n dbx_file_content = w.workspace.export(\n path=dbx_item_path ,\n format=format\n )\n\n \n binary_content = base64.b64decode(dbx_file_content.content)\n code_string = binary_content.decode('utf-8')\n \n # Iterate through the mapping and replace content\n for dbx_str, aidp_str in dbx_to_aidp_replacement_mappings.items():\n code_string = code_string.replace(dbx_str, aidp_str)\n \n modified_binary_content = code_string.encode('utf-8')\n\n with open(local_file_path , \"wb\") as f:\n f.write(modified_binary_content)\n\n print(f\"Downloaded File: {file_name} as {local_file_path}\")\n\n except Exception as export_error:\n print(f\"Failed to export notebook {dbx_item_path}: {export_error}\")\n\n else:\n print(f\"Skipping unsupported object type: {item.object_type} at {dbx_item_path}\")", + "metadata": { + "type": "python", + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "adaeed13-c355-4503-90bc-9aa8262c30cb", + "cell_type": "code", + "source": "# Initialize the WorkspaceClient\nw = WorkspaceClient(\n host=databricks_workspace_url ,\n token=databricks_token ,\n)\n\nprint(f\"Starting export from Databricks path '{databricks_path}' to local path '{aidp_path}'\")\n\n# Create AIDP local directory if not exists.\nos.makedirs(aidp_path , exist_ok=True)\n\n# Start the recursive export\nexport_folder_recursively(databricks_path , aidp_path , w)\n\nprint(\"\\nExport process finished.\")", + "metadata": { + "type": "python", + "trusted": true + }, + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/aidp_migration/databricks_to_aidp/requirements.txt b/aidp_migration/databricks_to_aidp/requirements.txt new file mode 100644 index 0000000..32c54fe --- /dev/null +++ b/aidp_migration/databricks_to_aidp/requirements.txt @@ -0,0 +1 @@ +databricks-sdk \ No newline at end of file From bdf7a54a2e118cb7fc4a0a7e105b0e45725b302c Mon Sep 17 00:00:00 2001 From: Nishant Patel Date: Thu, 23 Oct 2025 21:10:08 +0530 Subject: [PATCH 2/4] Adding display_name to notebook recursive-files-to-aidp.ipynb Adding display_name to notebook recursive-files-to-aidp.ipynb. --- .../databricks_to_aidp/recursive-files-to-aidp.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb b/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb index 106196c..7881295 100644 --- a/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb +++ b/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb @@ -1,7 +1,8 @@ { "metadata": { "kernelspec": { - "name": "notebook" + "name": "notebook", + "display_name": "Python 3" }, "language_info": { "file_extension": ".py", From cf5d0a224f539b58d55fb0f4ce1758056b03ac69 Mon Sep 17 00:00:00 2001 From: Nishant Patel Date: Fri, 24 Oct 2025 15:39:58 +0530 Subject: [PATCH 3/4] Adding readme file for databricks to aidp migration module. --- aidp_migration/databricks_to_aidp/README.md | 49 +++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 aidp_migration/databricks_to_aidp/README.md diff --git a/aidp_migration/databricks_to_aidp/README.md b/aidp_migration/databricks_to_aidp/README.md new file mode 100644 index 0000000..ebef5e3 --- /dev/null +++ b/aidp_migration/databricks_to_aidp/README.md @@ -0,0 +1,49 @@ +# Databricks to AIDP migration utility + +This utilities provides a sample implementation for exporting files and notebooks from a Databricks workspace to Oracle AI Data Platform (AIDP). It preserves the folder structure and converts notebooks to .ipynb format while supporting optional string replacement during the export process + + +## Running the Samples + +Before running the notebook, replace the following placeholders with your environment-specific values: +Required Parameters + +DATABRICKS_WORKSPACE_URL: Your Databricks workspace URL + +DATABRICKS_TOKEN: Your Databricks personal access token + +DATABRICKS_PATH: Source path in Databricks workspace to export + +AIDP_PATH: Target directory path in AIDP + +dbx_to_aidp_replacement_mappings: Optional String Replacement mappings if you need to modify content during export: + +## Documentation + +### Recursive Export +Traverses nested directory structures in Databricks workspace +### Format Preservation +Exports notebooks as Jupyter .ipynb files +### String Replacement +Supports source-to-target string mapping during export +### Structure Maintenance +Recreates the original folder hierarchy in AIDP +### Multiple File Types +Handles both notebooks and regular files + +## Get Support + + +## Security + +Please consult the [security guide](/SECURITY.md) for our responsible security vulnerability disclosure process. + +## Contributing + +This project welcomes contributions from the community. Before submitting a pull request, please [review our contribution guide](/CONTRIBUTING.md). + +## License + +See [LICENSE](/LICENSE.txt) + + From cb61262ff430c5e7d54a13038205e7679c9a899b Mon Sep 17 00:00:00 2001 From: Nishant Patel Date: Mon, 27 Oct 2025 12:17:46 +0530 Subject: [PATCH 4/4] Updated readme files based on review comments. Updated readme files based on review comments. --- aidp_migration/databricks_to_aidp/README.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/aidp_migration/databricks_to_aidp/README.md b/aidp_migration/databricks_to_aidp/README.md index ebef5e3..9841ce0 100644 --- a/aidp_migration/databricks_to_aidp/README.md +++ b/aidp_migration/databricks_to_aidp/README.md @@ -1,7 +1,13 @@ # Databricks to AIDP migration utility -This utilities provides a sample implementation for exporting files and notebooks from a Databricks workspace to Oracle AI Data Platform (AIDP). It preserves the folder structure and converts notebooks to .ipynb format while supporting optional string replacement during the export process +Utility to Export Databricks files(non-data) and notebooks to Oracle AI Data Platform (AIDP) +* Preserves folder structure +* Converts notebooks to .ipynb +* No code translation; files are moved as-is +* Optional plain string replacement from a provided mapping. Replacement is simple find/replace (no parsing) +* This is not intended for Data Files. +Run this notebook from AIDP. The user must have read permission on Databricks Path and write permission on the AIDP destination path. ## Running the Samples @@ -16,20 +22,24 @@ DATABRICKS_PATH: Source path in Databricks workspace to export AIDP_PATH: Target directory path in AIDP -dbx_to_aidp_replacement_mappings: Optional String Replacement mappings if you need to modify content during export: +dbx_to_aidp_replacement_mappings: Optional string-replacement map used during export. Basic example could be to rewrite path prefixes of referenced files/notebooks. ## Documentation ### Recursive Export Traverses nested directory structures in Databricks workspace ### Format Preservation -Exports notebooks as Jupyter .ipynb files +Exports notebooks as Jupyter(.ipynb) and other files as is. ### String Replacement Supports source-to-target string mapping during export ### Structure Maintenance Recreates the original folder hierarchy in AIDP ### Multiple File Types -Handles both notebooks and regular files +Handles both notebooks and regular files. +### No Code Conversion +It does not do any code conversion. +### Permissions +Need read permission on databricks and write permission on AIDP. ## Get Support