In [None]:
#%pip install pyspark duckdb

In [None]:
spark_version = "3.5"
scala_version = "2.12"
iceberg_version = "1.7.0"

from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date, rand, floor, expr

catalog_name = "iceberg"
warehouse_path = "./icehouse"

spark = SparkSession.builder \
    .appName("local_iceberg_example") \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config(f"spark.sql.catalog.{catalog_name}", "org.apache.iceberg.spark.SparkCatalog") \
    .config(f"spark.sql.catalog.{catalog_name}.type", "hadoop") \
    .config(f"spark.sql.catalog.{catalog_name}.warehouse", warehouse_path) \
    .config("spark.jars.packages", f"org.apache.iceberg:iceberg-spark-runtime-{spark_version}_{scala_version}:{iceberg_version}") \
    .getOrCreate()

In [2]:
row_cnt = 5_000
df = spark.range(0, row_cnt) \
    .withColumn('rpt_dt', current_date()) \
    .withColumn('some_val', floor(rand() * 100)) \
    .withColumn("txn_key", expr("uuid()")) \
    .withColumnRenamed('id', 'row_id') \
    .toDF('row_id', 'rpt_dt', 'some_val', 'txn_key')

In [3]:
spark.sql("create namespace dummy_ns")

DataFrame[]

In [4]:
namespace = "dummy_ns"
table_name = "dummy_data"

df.writeTo(f"{catalog_name}.{namespace}.{table_name}") \
    .using("iceberg") \
    .tableProperty("write.format.default", "parquet") \
    .createOrReplace()

                                                                                

In [5]:
import duckdb
cn = duckdb.connect()
cn.execute("""
INSTALL iceberg;
LOAD iceberg;
""")

<duckdb.duckdb.DuckDBPyConnection at 0x10e4539f0>

In [6]:
cn.sql(f"""
    select *
    from iceberg_scan('{warehouse_path}/{namespace}/{table_name}')
    limit 10
""").show()

┌────────┬────────────┬──────────┬──────────────────────────────────────┐
│ row_id │   rpt_dt   │ some_val │               txn_key                │
│ int64  │    date    │  int64   │               varchar                │
├────────┼────────────┼──────────┼──────────────────────────────────────┤
│      0 │ 2024-11-29 │       92 │ 26248dcd-e7c6-441c-ab1c-ced851b7b49d │
│      1 │ 2024-11-29 │        5 │ d43e6d75-68d6-456a-912c-8ff6edc4eb3c │
│      2 │ 2024-11-29 │       74 │ 27d60a81-a162-4985-8459-fa83a12ea6d2 │
│      3 │ 2024-11-29 │       62 │ 8cd3e1ed-bb4c-4cb0-84ff-06c6ddb3d89a │
│      4 │ 2024-11-29 │       58 │ 8cefb363-e8f1-4fc2-9592-b6cef9cba3d7 │
│      5 │ 2024-11-29 │       25 │ 896313f1-6eb3-47c8-98db-8e28d521de5f │
│      6 │ 2024-11-29 │       64 │ 9e50253c-ceef-4d51-aa36-c8d6ec81d038 │
│      7 │ 2024-11-29 │        7 │ 19fc3860-5746-4c85-b52f-49bde67879c6 │
│      8 │ 2024-11-29 │       63 │ dba3df3b-6c36-4593-bbac-049db8ad4b81 │
│      9 │ 2024-11-29 │       50 │ dae