In [66]:
# Create Partitioned Iceberg Table (SQL) if it does not exist
spark.sql("""
CREATE TABLE IF NOT EXISTS local.db.partitioned_table (
  id INT,
  category STRING,
  value DOUBLE
)
USING iceberg
PARTITIONED BY (category)
""")

DataFrame[]

In [67]:
# Insert new records into an existing Iceberg table

from pyspark.sql import Row

new_data = [Row(id=3, category="B", value=55.5), Row(id=4, category="C", value=77.0)]
df_new = spark.createDataFrame(new_data)

df_new.writeTo("local.db.partitioned_table").append()
spark.read.table("local.db.partitioned_table").show()


+---+--------+-----+
| id|category|value|
+---+--------+-----+
|  3|       B| 55.5|
|  4|       C| 77.0|
+---+--------+-----+



In [68]:
# Overwrite entire Iceberg table content

overwrite_df = spark.createDataFrame([(10, "X", 100.0)], ["id", "category", "value"])

overwrite_df.write.mode("overwrite").saveAsTable("local.db.partitioned_table")

# Verify updated content
spark.read.table("local.db.partitioned_table").show()


+---+--------+-----+
| id|category|value|
+---+--------+-----+
| 10|       X|100.0|
+---+--------+-----+



In [69]:
# Perform MERGE INTO for UPSERT operations (requires Iceberg 0.12+)

spark.sql("""
MERGE INTO local.db.partitioned_table t
USING (SELECT 10 AS id, 'X' AS category, 200.0 AS value) s
ON t.id = s.id
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *
""")

spark.read.table("local.db.partitioned_table").show()


+---+--------+-----+
| id|category|value|
+---+--------+-----+
| 10|       X|200.0|
+---+--------+-----+



In [70]:
# Delete records using condition

spark.sql("""
DELETE FROM local.db.partitioned_table
WHERE category = 'X'
""")

spark.read.table("local.db.partitioned_table").show()


+---+--------+-----+
| id|category|value|
+---+--------+-----+
+---+--------+-----+

