# Incremental MERGE (Upsert)

In [0]:
from delta.tables import DeltaTable

In [0]:
incremental_df = (
    spark.read
    .option("header", "true")
    .option("inferSchema", "true")
    .csv("/Volumes/workspace/ecommerce/ecommerce_data/2019-Nov.csv")
)

# MERGE into Delta Table

In [0]:
delta_table = DeltaTable.forPath(spark, "/Volumes/workspace/ecommerce/ecommerce_data/Delta/event_nov/")
(
    delta_table.alias("target")
    .merge(
        incremental_df.alias("source"),
        """
        target.user_session = source.user_session        AND target.event_time = source.event_time
        AND target.event_type = source.event_type
        """
    )
    .whenMatchedUpdate(set={
        "price": "source.price",
        "brand": "source.brand"
    })
    .whenNotMatchedInsert(values={
        "event_time": "source.event_time",
        "event_type": "source.event_type",
        "product_id": "source.product_id",
        "category_id": "source.category_id",
        "category_code": "source.category_code",
        "brand": "source.brand",
        "price": "source.price",
        "user_id": "source.user_id",
        "user_session": "source.user_session"
    })
    .execute()
)

# Query Historical Versions (Time Travel)
## Data was overwritten or corrupted.
## Need to inspect or recover older state.

# View Table History

In [0]:
%sql
DESCRIBE HISTORY delta.`/Volumes/workspace/ecommerce/ecommerce_data/Delta/event_nov/`;

# Query Specific Version

In [0]:
%sql
SELECT * 
FROM delta.`/Volumes/workspace/ecommerce/ecommerce_data/Delta/event_nov/`
VERSION AS OF 1

# Query by Timestamp

In [0]:
%sql
SELECT * FROM delta.`/Volumes/workspace/ecommerce/ecommerce_data/Delta/event_nov/`
TIMESTAMP AS OF '2026-01-21 08:54:45'


# Optimize Tables

In [0]:
%sql
OPTIMIZE delta.`/Volumes/workspace/ecommerce/ecommerce_data/Delta/event_nov/`;

# Optimize with Z-Ordering

In [0]:
%sql
OPTIMIZE delta.`/Volumes/workspace/ecommerce/ecommerce_data/Delta/event_nov/`
ZORDER BY (event_type, category_code);

# Clean Old Files (VACUUM)
## MERGE & overwrite operations leave unused files.

# Default Cleanup

In [0]:
%sql
VACUUM delta.`/Volumes/workspace/ecommerce/ecommerce_data/Delta/event_nov/`;