In [0]:
from pyspark.sql import types as T

data = [
    (1, "Asha", "IN", 100.0),
    (2, "Bob",  "US", 200.0),
    (3, "Chen", "CN", 150.0)
]

schema = T.StructType([
    T.StructField("order_id", T.IntegerType(), False),
    T.StructField("customer", T.StringType(), True),
    T.StructField("country",  T.StringType(), True),
    T.StructField("amount",   T.DoubleType(), True)
])

df = spark.createDataFrame(data, schema)
display(df)


order_id,customer,country,amount
1,Asha,IN,100.0
2,Bob,US,200.0
3,Chen,CN,150.0


In [0]:
df.write \
  .format("delta") \
  .mode("overwrite") \
  .saveAsTable("orders_delta")


In [0]:
%sql
SELECT * FROM orders_delta;

order_id,customer,country,amount
1,Asha,IN,100.0
2,Bob,US,200.0
3,Chen,CN,150.0


In [0]:
%sql
UPDATE orders_delta
SET amount = 250
WHERE order_id = 2;

num_affected_rows
1


In [0]:
%sql
INSERT INTO orders_delta
VALUES (4, 'Diana', 'US', 300.0);

num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql
DESCRIBE HISTORY orders_delta;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
3,2026-01-13T05:01:25.000Z,77838657344022,parthpatoliya001@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> true, partitionBy -> [])",,List(3660372887555348),0113-044657-w8zxg0uu-v2n,2.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1186)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13
2,2026-01-13T05:01:11.000Z,77838657344022,parthpatoliya001@gmail.com,OPTIMIZE,"Map(predicate -> [], auto -> true, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(3660372887555348),0113-044657-w8zxg0uu-v2n,1.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 2, numRemovedBytes -> 2447, p25FileSize -> 1257, numDeletionVectorsRemoved -> 1, minFileSize -> 1257, numAddedFiles -> 1, maxFileSize -> 1257, p75FileSize -> 1257, p50FileSize -> 1257, numAddedBytes -> 1257)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13
1,2026-01-13T05:01:09.000Z,77838657344022,parthpatoliya001@gmail.com,UPDATE,"Map(predicate -> [""(order_id#13530 = 2)""])",,List(3660372887555348),0113-044657-w8zxg0uu-v2n,0.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 3519, numDeletionVectorsUpdated -> 0, scanTimeMs -> 1642, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1200, rewriteTimeMs -> 1856)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13
0,2026-01-13T05:00:29.000Z,77838657344022,parthpatoliya001@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(3660372887555348),0113-044657-w8zxg0uu-v2n,,WriteSerializable,False,"Map(numFiles -> 1, numRemovedFiles -> 0, numRemovedBytes -> 0, numDeletionVectorsRemoved -> 0, numOutputRows -> 3, numOutputBytes -> 1247)",,Databricks-Runtime/17.3.x-aarch64-photon-scala2.13


In [0]:
%sql
SELECT * FROM orders_delta VERSION AS OF 0;

order_id,customer,country,amount
1,Asha,IN,100.0
2,Bob,US,200.0
3,Chen,CN,150.0


In [0]:
updates = [
    (2, "Bob", "US", 220.0),   # update
    (5, "Eva", "UK", 180.0)    # insert
]
df_updates = spark.createDataFrame(updates, schema)
display(df_updates)

order_id,customer,country,amount
2,Bob,US,220.0
5,Eva,UK,180.0


In [0]:
df_updates.createOrReplaceTempView("df_updates")

In [0]:
%sql
MERGE INTO orders_delta t
USING df_updates s
ON t.order_id = s.order_id
WHEN MATCHED THEN
  UPDATE SET *
WHEN NOT MATCHED THEN
  INSERT *;

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,1,0,1


In [0]:
%sql
SELECT * FROM orders_delta;

order_id,customer,country,amount
1,Asha,IN,100.0
3,Chen,CN,150.0
4,Diana,US,300.0
2,Bob,US,220.0
5,Eva,UK,180.0


In [0]:
%sql
OPTIMIZE orders_delta;

path,metrics
,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 0, null, null, 0, 0, 1, 1, true, 0, 0, 1768281116346, 1768281116911, 8, 0, null, List(0, 0), null, 4, 4, 0, 0, null)"


In [0]:
%sql
OPTIMIZE orders_delta
ZORDER BY (country);

path,metrics
,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 0, List(minCubeSize(107374182400), List(0, 0), List(1, 1326), 0, List(0, 0), 0, null), null, 0, 0, 1, 1, false, 0, 0, 1768281138674, 1768281139176, 8, 0, null, List(0, 0), null, 4, 4, 0, 0, null)"


In [0]:
%sql
VACUUM orders_delta;

path


In [0]:
%sql
SELECT * FROM orders_delta;

order_id,customer,country,amount
1,Asha,IN,100.0
3,Chen,CN,150.0
4,Diana,US,300.0
2,Bob,US,220.0
5,Eva,UK,180.0
