Incremental Merge

In [0]:
from delta.tables import DeltaTable
from pyspark.sql.functions import col, to_timestamp

# Load Delta table BY TABLE NAME (not volume path)
deltaTable = DeltaTable.forName(
    spark,
    "default.ecommerce_events"
)

# Read new (incremental) CSV data
nov_df = spark.read.csv(
    "/Volumes/workspace/ecommerce/ecommerce_data/2019-Nov.csv",
    header=True,
    inferSchema=True
)

# Cast required column
nov_df = nov_df.withColumn(
    "event_time",
    to_timestamp(col("event_time"))
)

# Incremental MERGE (Upsert)
deltaTable.alias("t").merge(
    nov_df.alias("s"),
    """
    t.user_id = s.user_id AND
    t.product_id = s.product_id AND
    t.event_time = s.event_time
    """
).whenMatchedUpdateAll() \
 .whenNotMatchedInsertAll() \
 .execute()


In [0]:
# Time Travel — By Version
oct_version_df = (
    spark.read.format("delta")
    .option("versionAsOf", 0)
    .table("default.ecommerce_events")
)




In [0]:
oct_version_df.show(5)

In [0]:
# Time Travel — By Timestamp (use valid timestamp)
events_as_of_time = (
    spark.read.format("delta")
    .option("timestampAsOf", "2026-01-15 09:20:00")
    .table("default.ecommerce_events")
)

events_as_of_time.show(5)


In [0]:
# (Optional) Find valid timestamps
spark.sql("DESCRIBE HISTORY default.ecommerce_events").show()


In [0]:
# Use fully qualified table name (most likely)
spark.sql("""
OPTIMIZE default.ecommerce_events
ZORDER BY (event_time, user_id, product_id)
""")
