In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Jupyter").getOrCreate()

spark

25/01/13 03:16:17 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [5]:
%%sql

CREATE DATABASE IF NOT EXISTS playground

In [8]:
spark.sql("""
CREATE TABLE IF NOT EXISTS playground.sample_table (
    id BIGINT,
    data STRING
)
USING iceberg
""")

DataFrame[]

In [9]:
data = [
    (1, "Hello World"),
    (2, "Apache Iceberg"),
    (3, "PySpark Demo")
]

df = spark.createDataFrame(data, ["id", "data"])

# Append to Iceberg table
df.writeTo("playground.sample_table").append()

                                                                                

In [13]:
# Direct SQL Query
spark.sql("SELECT * FROM playground.sample_table").show()


+---+--------------+
| id|          data|
+---+--------------+
|  1|   Hello World|
|  2|Apache Iceberg|
|  3|  PySpark Demo|
+---+--------------+



In [11]:
# Dataframe api
iceberg_df = spark.read.format("iceberg").load("playground.sample_table")
iceberg_df.show()

+---+--------------+
| id|          data|
+---+--------------+
|  1|   Hello World|
|  2|Apache Iceberg|
|  3|  PySpark Demo|
+---+--------------+



In [14]:
spark.sql("ALTER TABLE playground.sample_table ADD COLUMN extra_info STRING")

DataFrame[]

In [15]:
data_new = [
    (4, "Another record", "This is extra info"),
    (5, "Yet another record", "Additional details")
]
df_new = spark.createDataFrame(data_new, ["id", "data", "extra_info"])
df_new.writeTo("playground.sample_table").append()

In [16]:
spark.sql("SELECT * FROM playground.sample_table").show()


+---+------------------+------------------+
| id|              data|        extra_info|
+---+------------------+------------------+
|  4|    Another record|This is extra info|
|  1|       Hello World|              NULL|
|  5|Yet another record|Additional details|
|  2|    Apache Iceberg|              NULL|
|  3|      PySpark Demo|              NULL|
+---+------------------+------------------+



In [17]:
%%sql
SELECT * 
FROM playground.sample_table.snapshots

committed_at,snapshot_id,parent_id,operation,manifest_list,summary
2025-01-13 03:22:55.035000,620328652709790722,,append,s3://warehouse/playground/sample_table/metadata/snap-620328652709790722-1-5218b887-b4a8-4e53-b50f-bd52d78ce895.avro,"{'spark.app.id': 'local-1736738107160', 'changed-partition-count': '1', 'added-data-files': '3', 'total-equality-deletes': '0', 'added-records': '3', 'total-position-deletes': '0', 'added-files-size': '2167', 'total-delete-files': '0', 'total-files-size': '2167', 'total-records': '3', 'total-data-files': '3'}"
2025-01-13 03:25:08.072000,2157568210429522517,6.203286527097907e+17,append,s3://warehouse/playground/sample_table/metadata/snap-2157568210429522517-1-7e00d570-bdf1-420c-9f65-92808b323de5.avro,"{'spark.app.id': 'local-1736738107160', 'changed-partition-count': '1', 'added-data-files': '2', 'total-equality-deletes': '0', 'added-records': '2', 'total-position-deletes': '0', 'added-files-size': '2200', 'total-delete-files': '0', 'total-files-size': '4367', 'total-records': '5', 'total-data-files': '5'}"


In [18]:
%%sql
SELECT * 
FROM playground.sample_table.history

made_current_at,snapshot_id,parent_id,is_current_ancestor
2025-01-13 03:22:55.035000,620328652709790722,,True
2025-01-13 03:25:08.072000,2157568210429522517,6.203286527097907e+17,True


In [21]:
%%sql
CALL system.rollback_to_snapshot('playground.sample_table', 620328652709790722)

previous_snapshot_id,current_snapshot_id
2157568210429522517,620328652709790722


In [22]:
spark.sql("SELECT * FROM playground.sample_table").show()


+---+--------------+----------+
| id|          data|extra_info|
+---+--------------+----------+
|  1|   Hello World|      NULL|
|  2|Apache Iceberg|      NULL|
|  3|  PySpark Demo|      NULL|
+---+--------------+----------+

