![](https://indiandataclub.notion.site/image/attachment%3A5b91d1ae-a34d-4ce2-bdac-00b5555344d3%3A5.png?table=block&id=2e39b695-c411-8027-b146-f4e37fba5517&spaceId=80a9b695-c411-8172-892e-0003dced8fec&width=2000&userId=&cache=v2)

**üîπ Task 1: Incremental MERGE (Upserts)**

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number

source_df = spark.table("workspace.ecommerce.ecommerce_sql")

window_spec = Window.partitionBy(
    "event_time",
    "event_type",
    "product_id",
    "user_id",
    "user_session"
).orderBy("event_time")

source_dedup = source_df.withColumn(
    "rn", row_number().over(window_spec)
).filter("rn = 1").drop("rn")


In [0]:
from delta.tables import DeltaTable

delta_table = DeltaTable.forName(
    spark,
    "workspace.ecommerce.ecommerce_pyspark"
)

delta_table.alias("t").merge(
    source_dedup.alias("s"),
    """
    t.event_time = s.event_time AND
    t.event_type = s.event_type AND
    t.product_id = s.product_id AND
    t.user_id = s.user_id AND
    t.user_session = s.user_session
    """
).whenMatchedUpdateAll() \
 .whenNotMatchedInsertAll() \
 .execute()


**üîπ Task 2: Query Historical Versions (Time Travel)**

_1Ô∏è‚É£ View table history_

In [0]:
%sql
DESCRIBE HISTORY workspace.ecommerce.ecommerce_pyspark;


_2Ô∏è‚É£ Read a specific version_

In [0]:
df_v0 = spark.read.format("delta") \
    .option("versionAsOf", 0) \
    .table("workspace.ecommerce.ecommerce_pyspark")

df_v0.display()


_3Ô∏è‚É£ Read by timestamp_

In [0]:
df_old = spark.read.format("delta") \
    .option("timestampAsOf", "2026-01-28 13:10:00") \
    .table("workspace.ecommerce.ecommerce_pyspark")

df_old.display()


**üîπ Task 3: OPTIMIZE & ZORDER (Performance)**

In [0]:
%sql
OPTIMIZE workspace.ecommerce.ecommerce_pyspark
ZORDER BY (event_type, user_id);


**üîπ Task 4: VACUUM (Clean Old Files)**

In [0]:
%sql
VACUUM workspace.ecommerce.ecommerce_pyspark RETAIN 168 HOURS;
