In [None]:
"""
    Author: Matt Martin
    Date: 2023-11-07
    Desc: Pyspark Iceberg Demo
"""

import pyspark
from pyspark.sql import SparkSession
import os

spark = SparkSession.builder \
    .master("local") \
    .config("spark.sql.extensions","org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config("spark.sql.catalog.spark_catalog","org.apache.iceberg.spark.SparkSessionCatalog") \
    .config("spark.sql.catalog.spark_catalog.type", "hive") \
    .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.local.type", "hadoop") \
    .config("spark.sql.catalog.local.warehouse", "./warehouse") \
    .config("spark.sql.defaultCatalog", "local") \
    .getOrCreate()


In [None]:
# create a table
sql = """
CREATE or replace TABLE local.b1.employees (first_name string, last_name string, age int) using iceberg
"""
spark.sql(sql).show()

In [None]:
sql = """
CREATE or replace TABLE local.b1.employees2 (first_name string, last_name string, age int) using iceberg
"""
spark.sql(sql).show()

In [None]:
#load some dummy data
sql = """
INSERT INTO local.b1.employees VALUES ('Tom', 'Brady', 44)
"""
spark.sql(sql)

sql = """
INSERT INTO local.b1.employees2 
    VALUES 
         ('Aaron', 'Rodgers', 39)
        ,('Tom', 'Brady', 45)
"""
spark.sql(sql)

In [None]:
#merge example
sql = """
MERGE INTO local.b1.employees as tgt
    using local.b1.employees2 as src
        on tgt.first_name = src.first_name and tgt.last_name = src.last_name
    when matched then update set tgt.age = src.age
    when not matched then 
        insert *
"""

spark.sql(sql)

In [None]:
#validate
spark.sql("select * from local.b1.employees").show()

In [None]:
#delete example
spark.sql("delete from local.b1.employees where age > 41")

In [None]:
#validate again
spark.sql("select * from local.b1.employees").show()