Oracle AI Data Platform v1.0

Copyright Â© 2025, Oracle and/or its affiliates.

Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

### Sample Code: Exporting Databricks Files to AIDP.

This example demonstrates how to export files recursively from databricks workspace using `databricks-sdk` Library and write to an **AIDP**.

**Note:** 

- Replace all placeholders (e.g., `<DATABRICKS_WORKSPACE_URL>`, `<DATABRICKS_TOKEN>`, `<DATABRICKS_PATH>`, `<AIDP_PATH>` etc.) with values specific to your environment before running the notebook. 
- Provide Source to Target String replacement if you wish to do while importing to AIDP.
- Use with caution: The notebook is designed for exporting notebooks & code related files only.

In [None]:
import os
import base64
from databricks.sdk import WorkspaceClient
from databricks.sdk.service import workspace

In [None]:
#Databricks Workspace URL
databricks_workspace_url = "DATABRICKS_WORKSPACE_URL"
#Databricks Token
databricks_token = "DATABRICKS_TOKEN"
# Define the Databricks folder you want to export
databricks_path = "DATABRICKS_PATH"
# Define the local AIDP directory to write the exported content
aidp_path = "AIDP_PATH"

In [None]:
#Provide Comma Seperated mapping to replace Source String with Target String. These are just string replacement so mapping should be provided carefully.
dbx_to_aidp_replacement_mappings = {
    "SOURCE_STR_1": "TARGET_STR_1",
    "SOURCE_STR_2": "TARGET_STR_2"
}

In [None]:
#Recursively exports a Databricks workspace folder to a local directory, preserving the nested folder structure and exporting notebooks as .ipynb files.

def export_folder_recursively(databricks_path: str , aidp_path: str , w: WorkspaceClient):

    try:
        # List contents of the current workspace path
        contents = w.workspace.list(path=databricks_path)
    except Exception as e:
        print(f"Failed to list contents of Databricks path {databricks_path}: {e}")
        return

    for item in contents:
        dbx_item_path = item.path

        # Determine the relative path to maintain the nested structure
        dbx_relative_path = os.path.relpath(dbx_item_path , databricks_path)
        aidp_full_path = os.path.join(aidp_path , dbx_relative_path)

        if item.object_type == workspace.ObjectType.DIRECTORY:
            # Create the local directory and recurse into it
            os.makedirs(aidp_full_path , exist_ok=True)
            print(f"Created local directory: {aidp_full_path}")
            export_folder_recursively(dbx_item_path , aidp_full_path , w)
        elif item.object_type == workspace.ObjectType.FILE or item.object_type == workspace.ObjectType.NOTEBOOK:
            file_name = os.path.basename(dbx_item_path)
            if item.object_type == workspace.ObjectType.NOTEBOOK:
                local_file_path = os.path.join(os.path.dirname(aidp_full_path) , f"{file_name}.ipynb")
                format = workspace.ExportFormat.JUPYTER
            else:
                local_file_path = os.path.join(os.path.dirname(aidp_full_path) , file_name)
                format = workspace.ExportFormat.SOURCE

            try:
                # Export the file/notebook content
                print(f"Exporting File/Notebook: {dbx_item_path} to {local_file_path}")
                dbx_file_content = w.workspace.export(
                    path=dbx_item_path ,
                    format=format
                )

                
                binary_content = base64.b64decode(dbx_file_content.content)
                code_string = binary_content.decode('utf-8')
                
                # Iterate through the mapping and replace content
                for dbx_str, aidp_str in dbx_to_aidp_replacement_mappings.items():
                    code_string = code_string.replace(dbx_str, aidp_str)
                
                modified_binary_content = code_string.encode('utf-8')

                with open(local_file_path , "wb") as f:
                    f.write(modified_binary_content)

                print(f"Downloaded File: {file_name} as {local_file_path}")

            except Exception as export_error:
                print(f"Failed to export notebook {dbx_item_path}: {export_error}")

        else:
            print(f"Skipping unsupported object type: {item.object_type} at {dbx_item_path}")

In [None]:
# Initialize the WorkspaceClient
w = WorkspaceClient(
    host=databricks_workspace_url ,
    token=databricks_token ,
)

print(f"Starting export from Databricks path '{databricks_path}' to local path '{aidp_path}'")

# Create AIDP local directory if not exists.
os.makedirs(aidp_path , exist_ok=True)

# Start the recursive export
export_folder_recursively(databricks_path , aidp_path , w)

print("\nExport process finished.")