In [None]:
Oracle AI Data Platform v1.0

Copyright Â© 2025, Oracle and/or its affiliates.

Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

# Delta Schema Evolution
**This notebook illustrates how you can configure your AI Data Platform to allow for a schema that evolves over time.**
 
It covers:
 
 1. **Direct Write to OCI Object Storage**
 2. **Accessing OCI Object Storage via an External Table**
    - Creating an external table
    - Querying an external table
 3. **Writing new data with additional column to OCI Object Storage**
    - Writing to object storage
    - Reading data
 
# **Parameters**
 - OCI Object Storage bucket name and Namespace name
 - Catalog name
 - Schema name
 - External table name
 - Folder name

In [None]:
# Change these values to your env
oci_bucket=oidlUtils.parameters.getParameter("OCI_BUCKET", "oci://replace_bucket_name@replace_namespace")
p_catalog_name=oidlUtils.parameters.getParameter("CATALOG_NAME", "default")
p_schema_name=oidlUtils.parameters.getParameter("SCHEMA_NAME", "default")
p_table_name=oidlUtils.parameters.getParameter("TABLE_NAME", "deltatab")
p_folder_name=oidlUtils.parameters.getParameter("FOLDER_NAME", "mydata")


In [1]:
import pyspark
from pyspark.sql.types import *
from pyspark.sql.functions import *

In [1]:
# Create a spark dataframe and write as a delta table
data = [("Robert", "Baratheon", "Baratheon", "Storms End", 48),
        ("Eddard", "Stark", "Stark", "Winterfell", 46),
        ("Jamie", "Lannister", "Lannister", "Casterly Rock", 29)
        ]
schema = StructType([
    StructField("firstname", StringType(), True),
    StructField("lastname", StringType(), True),
    StructField("house", StringType(), True),
    StructField("location", StringType(), True),
    StructField("age", IntegerType(), True)
])

sample_dataframe = spark.createDataFrame(data=data, schema=schema)
sample_dataframe.write.mode(saveMode="overwrite").format("delta").save(oci_bucket+'/'+p_folder_name+'/')

In [1]:
# Create an external table on the folder for the delta table above
spark.sql("create table if not exists "+p_catalog_name+"."+p_schema_name+"."+p_table_name+" USING DELTA LOCATION '"+oci_bucket+"/"+p_folder_name+"/'");

In [1]:
# Query the Delta table
spark.sql("select * from " + p_catalog_name+"."+p_schema_name+"."+p_table_name).show()

In [1]:
# Let's evolve the schema, add a salary column
data = [("Jim", "Benson", "Hillmount", "Glasgow", 34,100),
        ("Jen", "Oliver", "Sleepy Hollow", "Cheddar", 37,200)
        ]
schema = StructType([
    StructField("firstname", StringType(), True),
    StructField("lastname", StringType(), True),
    StructField("house", StringType(), True),
    StructField("location", StringType(), True),
    StructField("age", IntegerType(), True),
    StructField("salary", IntegerType(), True)
])

sample_dataframe = spark.createDataFrame(data=data, schema=schema)
sample_dataframe.write.mode(saveMode="append").format("delta").option("mergeSchema", "true").save(oci_bucket+'/'+p_folder_name+'/')

In [1]:
# Query the Delta table and see the new column 'salary', the null values in the table for historical rows

spark.sql("select * from " + p_catalog_name+"."+p_schema_name+"."+p_table_name).show()

In [1]:
spark.sql("drop table "+p_catalog_name+"."+p_schema_name+"."+p_table_name);