In [None]:
Oracle AI Data Platform v1.0

Copyright © 2025, Oracle and/or its affiliates.

Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

# Integrate with ALH
 **Loading Data in ALH**
 
 This notebook demonstrates writing data into ALH in AI Data Platform. It covers:
 
 1. **Create ALH External Catalog**
 2. **Inserting data into table in the ALH using pyspark insertInto**
 3. **Inserting data into table in the ALH using SQL INSERT**
 
 **Parameters**
 - Workspace location for the wallet file - you must upload your ADB wallet
 - User name for your ADB
 - Password for your ADB
 - TNS name to use
 - Wallet password

 **Prerequisites**

Before you begin, ensure you have:
 - An ALH accessible from your AI Data Platform workspace (if its private you need to create a workspace to use private connectivity)
 - A configured AI Data Platform environment with a compute cluster created.

 - Upload your ADB wallet into the workspace and set the parameter for the wallet file location
 - Create the table in your ALH using ALH OCI Console or your other favorite IDE
 - create table gold.patient_data("patient_id" varchar2(30), "heart_rate_bpm" number, "blood_pressure"  varchar2(30), "oxygen_saturation" number, "room_number"  varchar2(30), "status"  varchar2(30));

 **Next Steps**

Follow the step-by-step instructions in this notebook to implement each approach. You can also refer to the following resources for additional guidance:
 - Creating external catalogs in AI Data Platform

By the end of this notebook, you’ll be able to load data into an ALH table from AI Data Platform.

In [None]:
# Define parameters (can set parameters in a workflow job)
user=oidlUtils.parameters.getParameter("USER", "replace_with_your_db_user")
passwd=oidlUtils.parameters.getParameter("PASSWD", "replace_with_your_db_password")
tns=oidlUtils.parameters.getParameter("TNS", "replace_with_your_tns")
wallet_passwd=oidlUtils.parameters.getParameter("WALLET_PASSWD", "replace_with_your_wallet_password")
wallet_path=oidlUtils.parameters.getParameter("WALLET_PATH", "replace_with_your_wallet_path")
#End of parameters that need to be set, you can run the rest of the cells"

In [None]:
import base64

byte_array=[]
try:
  with open(wallet_path, 'rb') as file:
    byte_array = bytearray(file.read())
except FileNotFoundError:
  print(f"Error: File not found: {wallet_path}")
except Exception as e:
  print(f"An error occurred: {e}")

wt = base64.b64encode(byte_array).decode('utf-8')
create_sql=f"create external catalog if not exists catalog_ALH options ('wallet.content' = '{wt}', 'type' = 'ORACLE_ALH', 'user.name' = '{user}', 'tns' = '{tns}', 'password' = '{passwd}','wallet.password' = '{wallet_passwd}')"
spark.sql(create_sql).show(1000,False)

### Alternate approach 1
#### Copy to clipboard

#### cat wallet.zip | base64 | pbcopy

### Alternate approach 1
#### Write the output to a text file and copy it manually

#### cat wallet.zip | base64 > wallet.txt

### Copying from a text file can help avoid any unintended characters that might get introduced when copying directly from the terminal.

In [None]:
df = spark.createDataFrame(
[
    ("P001", 72, "120/80", 98, "302A", "stable"),
    ("P002", 95, "145/95", 90, "215B", "critical")
],["patient_id", "heart_rate_bpm", "blood_pressure", "oxygen_saturation", "room_number", "status"])
df.write.insertInto("catalog_ALH.gold.patient_data")


In [None]:
df=spark.sql("select * from catalog_ALH.gold.patient_data")
df.show()

In [None]:
# Read using Spark.read.table
df = spark.read.table("catalog_ALH.gold.patient_data")
df.show()

In [None]:
# Let's create a dataframe to use for SQL INSERT into ALH
df = spark.createDataFrame(
[
    ("P003", 88, "125/95", 92, "215B", "critical")
],["patient_id", "heart_rate_bpm", "blood_pressure", "oxygen_saturation", "room_number", "status"])
df.createOrReplaceTempView("src_data")
spark.sql("select * from src_data").show()

In [None]:
%sql
INSERT into catalog_ALH.gold.patient_data select * from src_data

In [None]:
# Read using Spark.read.table
df = spark.read.table("catalog_ALH.gold.patient_data")
df.show()

# Spark Read Examples with Wallet Path Options

These samples show how to configure Spark to read from **Oracle Autonomous AI Lakehouse (ALH)** in AI Data Platform using different `wallet.path` settings.


### Using Workspace Wallet Path

This option uses the wallet stored inside your workspace.


In [None]:

df = (
    spark.read.format("aidataplatform")
    .option("type", "ORACLE_ALH")
    .option("wallet.path", "/Workspace/Wallet_customer_alh.zip")
    .option("tns", "testdev_high")
    .option("user.name", "ADMIN")
    .option("password", "PASSWORD")
    .option("schema", "SCHEMA")
    .option("table", "TABLE")
    .load()
)

df.show()

### Using Volumes Wallet Path
This option uses the wallet stored in a mounted volume.

In [None]:
df = (
    spark.read.format("aidataplatform")
    .option("type", "ORACLE_ALH")
    .option("wallet.path", "/Volumes/default/test/Wallet_customer_alh.zip")
    .option("tns", "testdev_high")
    .option("user.name", "ADMIN")
    .option("password", "PASSWORD")
    .option("schema", "SCHEMA")
    .option("table", "TABLE")
    .load()
)

df.show()