In [None]:
# Import required libraries
from pyspark.sql import SparkSession

# Configure Spark session
spark = SparkSession.builder \
    .appName("IcebergTableCreation") \
    .config("spark.sql.catalog.demo", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.demo.type", "rest") \
    .config("spark.sql.catalog.demo.uri", "http://iceberg-rest:8181") \
    .config("spark.sql.catalog.demo.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") \
    .config("spark.sql.catalog.demo.warehouse", "s3://warehouse/") \
    .config("spark.sql.catalog.demo.s3.endpoint", "http://minio:9000") \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config("spark.hadoop.fs.s3a.access.key", "admin") \
    .config("spark.hadoop.fs.s3a.secret.key", "password") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000") \
    .getOrCreate()
# Create a database
spark.sql("CREATE DATABASE IF NOT EXISTS demo.nyc")
# Create an Iceberg table
spark.sql("""
CREATE TABLE demo.nyc.taxis (
    vendor_id BIGINT,
    trip_id BIGINT,
    trip_distance FLOAT,
    fare_amount DOUBLE,
    store_and_fwd_flag STRING
)
USING iceberg
PARTITIONED BY (vendor_id)
LOCATION 's3://warehouse/nyc/taxis'
""")
# Insert sample data
spark.sql("""
INSERT INTO demo.nyc.taxis
VALUES 
    (1, 1000371, 1.8, 15.32, 'N'),
    (2, 1000372, 2.5, 22.15, 'N'),
    (2, 1000373, 0.9, 9.01, 'N'),
    (1, 1000374, 8.4, 42.13, 'Y')
""")
# Verify the data
spark.sql("SELECT * FROM demo.nyc.taxis").show()
# Stop the Spark session
spark.stop()