# Spark Local Setup for Apache Iceberg
This notebook sets up a local Spark session with Iceberg support.

In [7]:
from pyspark.sql import SparkSession
import os

# Ensure the warehouse path exists
warehouse_path = "/home/jovyan/iceberg/warehouse"
os.makedirs(warehouse_path, exist_ok=True)

# Initialize SparkSession with HadoopCatalog
spark = SparkSession.builder \
    .appName("IcebergLocalSetup") \
    .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.local.type", "hadoop") \
    .config("spark.sql.catalog.local.warehouse", warehouse_path) \
    .getOrCreate()

spark.sparkContext.setLogLevel("WARN")
print("✅ SparkSession initialized with local Iceberg Hadoop catalog")


✅ SparkSession initialized with local Iceberg Hadoop catalog


In [8]:
# Optional: List available catalogs and configurations
for k, v in spark.sparkContext.getConf().getAll():
    if 'catalog' in k:
        print(f"{k} = {v}")

spark.sql.catalog.local.warehouse = /home/jovyan/iceberg/warehouse
spark.sql.catalog.local.type = hadoop
spark.sql.catalog.local = org.apache.iceberg.spark.SparkCatalog


In [None]:
spark.sql("""
    CREATE TABLE IF NOT EXISTS local.db.sample_table (
        id INT,
        name STRING,
        created_at TIMESTAMP
    ) USING iceberg
""")
print("✅ Sample table 'local.db.sample_table' created successfully")

✅ Sample table 'local.db.sample_table' created successfully


In [None]:
spark.sql("INSERT INTO local.db.sample_table VALUES (1, 'Alice', current_timestamp())")
df = spark.sql("SELECT * FROM local.db.sample_table")
df.show()

+---+-----+--------------------+
| id| name|          created_at|
+---+-----+--------------------+
|  1|Alice|2025-07-10 07:26:...|
|  1|Alice|2025-07-10 07:49:...|
+---+-----+--------------------+

