In [None]:
#%pip install -r requirements.txt

In [None]:
spark_version = "3.5"
scala_version = "2.12"
iceberg_version = "1.7.0"

from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date, rand, floor, expr

catalog_name = "iceberg"
warehouse_path = "./icehouse"

spark = SparkSession.builder \
    .appName("iceberg_timetravel_stuff") \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config(f"spark.sql.catalog.{catalog_name}", "org.apache.iceberg.spark.SparkCatalog") \
    .config(f"spark.sql.catalog.{catalog_name}.type", "hadoop") \
    .config(f"spark.sql.catalog.{catalog_name}.warehouse", warehouse_path) \
    .config("spark.jars.packages", f"org.apache.iceberg:iceberg-spark-runtime-{spark_version}_{scala_version}:{iceberg_version}") \
    .config("spark.driver.bindAddress","127.0.0.1") \
    .config("spark.driver.host", "localhost") \
    .getOrCreate()

namespace = "stock_stuff"
spark.sql(f"create namespace {namespace}")

In [None]:
spark.sql(f"""create or replace table {catalog_name}.{namespace}.tickers
        using iceberg
        as
        select 'MSFT' as ticker_symbol, 23.99 as price
        union all
        select 'SNOW', 34.99 as price
        """)


In [5]:
spark.sql(f"select * from {catalog_name}.{namespace}.tickers").show()

+-------------+-----+
|ticker_symbol|price|
+-------------+-----+
|         MSFT|23.99|
|         SNOW|34.99|
+-------------+-----+



In [6]:
df = spark.sql("""
    select 'MSFT' as ticker_symbol, 21.45 as price
""")
df.createOrReplaceTempView("data")

In [7]:
spark.sql(f"""
    MERGE INTO {catalog_name}.{namespace}.tickers as tgt
        USING data as src
            on tgt.ticker_symbol = src.ticker_symbol
        WHEN MATCHED THEN UPDATE set tgt.price = src.price
        WHEN NOT MATCHED THEN INSERT *
""")

DataFrame[]

In [8]:
spark.sql(f"select * from {catalog_name}.{namespace}.tickers").show()

+-------------+-----+
|ticker_symbol|price|
+-------------+-----+
|         MSFT|21.45|
|         SNOW|34.99|
+-------------+-----+



In [29]:
spark.sql(f"select * from {catalog_name}.{namespace}.tickers.history").show(truncate=False)

+-----------------------+-------------------+-------------------+-------------------+
|made_current_at        |snapshot_id        |parent_id          |is_current_ancestor|
+-----------------------+-------------------+-------------------+-------------------+
|2025-04-28 08:50:01.7  |7391578844974642703|NULL               |true               |
|2025-04-28 08:50:10.306|8879485710628370466|7391578844974642703|true               |
+-----------------------+-------------------+-------------------+-------------------+



In [31]:
spark.sql(f"select * from {catalog_name}.{namespace}.tickers TIMESTAMP AS OF '2025-04-28 08:50:01.7' WHERE ticker_symbol = 'MSFT'").show()
spark.sql(f"select * from {catalog_name}.{namespace}.tickers TIMESTAMP AS OF '2025-04-28 08:50:11' WHERE ticker_symbol = 'MSFT'").show()

+-------------+-----+
|ticker_symbol|price|
+-------------+-----+
|         MSFT|23.99|
+-------------+-----+

+-------------+-----+
|ticker_symbol|price|
+-------------+-----+
|         MSFT|21.45|
+-------------+-----+



In [None]:
spark.sql(f"""
   create or replace table {catalog_name}.{namespace}.tickers
   using iceberg
   AS
   SELECT * 
   FROM {catalog_name}.{namespace}.tickers    
   TIMESTAMP AS OF '2025-04-28 08:50:01.7'   
""")


DataFrame[]

In [33]:
spark.sql(f"select * from {catalog_name}.{namespace}.tickers").show()

+-------------+-----+
|ticker_symbol|price|
+-------------+-----+
|         MSFT|23.99|
|         SNOW|34.99|
+-------------+-----+

