In [1]:
spark.conf.set("spark.sql.catalog.iceberg", "org.apache.iceberg.spark.SparkCatalog")
spark.conf.set("spark.sql.catalog.iceberg.type", "hadoop")
spark.conf.set("spark.sql.catalog.iceberg.warehouse", "gs://tpch-source/iceberg_warehouse")

In [2]:
spark.sql("SHOW CATALOGS").show()

+-------------+
|      catalog|
+-------------+
|spark_catalog|
+-------------+



In [6]:
spark.sql("CREATE NAMESPACE IF NOT EXISTS iceberg.tpch")


DataFrame[]

In [10]:
spark.sql("""
CREATE TABLE IF NOT EXISTS iceberg.tpch.products (
    product_id INT,
    name STRING,
    price DECIMAL(10,2)
) USING ICEBERG
""")


DataFrame[]

In [11]:
spark.sql("""
INSERT INTO iceberg.tpch.products VALUES
(1, 'Laptop', 999.99),
(2, 'Phone', 499.99),
(3, 'Tablet', 299.99)
""")


                                                                                

DataFrame[]

In [12]:
spark.sql("SELECT * FROM iceberg.tpch.products").show()


[Stage 4:>                                                          (0 + 1) / 1]

+----------+------+------+
|product_id|  name| price|
+----------+------+------+
|         1|Laptop|999.99|
|         2| Phone|499.99|
|         3|Tablet|299.99|
+----------+------+------+



                                                                                

In [13]:
spark.sql("""
MERGE INTO iceberg.tpch.products t
USING (SELECT 1 AS product_id, 1099.99 AS price) s
ON t.product_id = s.product_id
WHEN MATCHED THEN UPDATE SET t.price = s.price
""")


                                                                                

DataFrame[]

In [14]:
spark.sql("SELECT * FROM iceberg.tpch.products").show()


+----------+------+-------+
|product_id|  name|  price|
+----------+------+-------+
|         1|Laptop|1099.99|
|         2| Phone| 499.99|
|         3|Tablet| 299.99|
+----------+------+-------+



In [15]:
spark.sql("DELETE FROM iceberg.tpch.products WHERE name = 'Tablet'")


DataFrame[]

In [16]:
spark.sql("SELECT * FROM iceberg.tpch.products").show()


+----------+------+-------+
|product_id|  name|  price|
+----------+------+-------+
|         1|Laptop|1099.99|
|         2| Phone| 499.99|
+----------+------+-------+



In [17]:
# history
spark.sql("SELECT * FROM iceberg.tpch.products.history").show(truncate=False)


+-----------------------+-------------------+-------------------+-------------------+
|made_current_at        |snapshot_id        |parent_id          |is_current_ancestor|
+-----------------------+-------------------+-------------------+-------------------+
|2025-02-02 19:13:01.589|5030089142580687202|NULL               |true               |
|2025-02-02 19:13:39.836|3249960666204203601|5030089142580687202|true               |
|2025-02-02 19:14:55.659|6327152097578355969|3249960666204203601|true               |
+-----------------------+-------------------+-------------------+-------------------+



In [19]:
# Time Travel Using VERSION AS OF snapshot_id
spark.sql("SELECT snapshot_id, committed_at FROM iceberg.tpch.products.snapshots").show(truncate=False)


+-------------------+-----------------------+
|snapshot_id        |committed_at           |
+-------------------+-----------------------+
|5030089142580687202|2025-02-02 19:13:01.589|
|3249960666204203601|2025-02-02 19:13:39.836|
|6327152097578355969|2025-02-02 19:14:55.659|
+-------------------+-----------------------+



In [22]:
spark.sql("""
SELECT * FROM iceberg.tpch.products 
VERSION AS OF 6327152097578355969
""").show()

+----------+------+-------+
|product_id|  name|  price|
+----------+------+-------+
|         1|Laptop|1099.99|
|         2| Phone| 499.99|
+----------+------+-------+



In [25]:
# time travel using  TIMESTAMP AS OF
# copy time stamp from above products.snapshots query
spark.sql("""
SELECT * FROM iceberg.tpch.products 
TIMESTAMP AS OF TIMESTAMP '2025-02-02 19:13:01.589'
""").show()


[Stage 24:>                                                         (0 + 1) / 1]

+----------+------+------+
|product_id|  name| price|
+----------+------+------+
|         1|Laptop|999.99|
|         2| Phone|499.99|
|         3|Tablet|299.99|
+----------+------+------+



                                                                                

In [26]:
spark.sql("SELECT * FROM iceberg.tpch.products.snapshots").show(truncate=False)


+-----------------------+-------------------+-------------------+---------+------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|committed_at           |snapshot_id        |parent_id          |operation|manifest_list                                                                                                                 |summary                                   

In [27]:
spark.sql("SELECT * FROM iceberg.tpch.products.history").show(truncate=False)


+-----------------------+-------------------+-------------------+-------------------+
|made_current_at        |snapshot_id        |parent_id          |is_current_ancestor|
+-----------------------+-------------------+-------------------+-------------------+
|2025-02-02 19:13:01.589|5030089142580687202|NULL               |true               |
|2025-02-02 19:13:39.836|3249960666204203601|5030089142580687202|true               |
|2025-02-02 19:14:55.659|6327152097578355969|3249960666204203601|true               |
+-----------------------+-------------------+-------------------+-------------------+



In [28]:
spark.sql("SELECT * FROM iceberg.tpch.products").show(truncate=False)


+----------+------+-------+
|product_id|name  |price  |
+----------+------+-------+
|1         |Laptop|1099.99|
|2         |Phone |499.99 |
+----------+------+-------+



In [30]:
spark.sql("SELECT snapshot_id, committed_at FROM iceberg.tpch.products.snapshots").show(truncate=False)


+-------------------+-----------------------+
|snapshot_id        |committed_at           |
+-------------------+-----------------------+
|5030089142580687202|2025-02-02 19:13:01.589|
|3249960666204203601|2025-02-02 19:13:39.836|
|6327152097578355969|2025-02-02 19:14:55.659|
+-------------------+-----------------------+



In [31]:
df_old = spark.read.format("iceberg").option("snapshot-id", 6327152097578355969).load("iceberg.tpch.products")
df_old.show()


+----------+------+-------+
|product_id|  name|  price|
+----------+------+-------+
|         2| Phone| 499.99|
|         1|Laptop|1099.99|
+----------+------+-------+



In [None]:
# overwrite data from rollback

df_old.write.format("iceberg").mode("overwrite").save("iceberg.tpch.products")

In [29]:
# will not work due to extentions not enabled
spark.sql("CALL iceberg.tpch.system.rollback_to_snapshot('tpch.products', 6327152097578355969)")


ParseException: 
[PARSE_SYNTAX_ERROR] Syntax error at or near 'CALL'.(line 1, pos 0)

== SQL ==
CALL iceberg.tpch.system.rollback_to_snapshot('tpch.products', 6327152097578355969)
^^^


In [32]:
# must be set at begining.
spark.conf.set("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")


AnalysisException: Cannot modify the value of a static config: spark.sql.extensions.

In [33]:
spark.stop() 