In [3]:
import findspark
findspark.init()

from pyspark.sql import SparkSession

# Inisialisasi Spark dengan konfigurasi katalog bernama "iceberg"
spark = (SparkSession.builder
    .appName("Read-Existing-Iceberg")
    .master("spark://spark-nb:7077")
    # 1. Aktifkan Extensions Iceberg
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
    # 2. Konfigurasi Katalog bernama 'iceberg'
    .config("spark.sql.catalog.iceberg", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.iceberg.type", "hive")
    .config("spark.sql.catalog.iceberg.uri", "thrift://hive-metastore:9083")
    .config("spark.sql.catalog.iceberg.warehouse", "s3a://iceberg/lakehouse")
    .config("spark.sql.catalog.iceberg.io-impl", "org.apache.iceberg.hadoop.HadoopFileIO")    
    # 3. Konfigurasi S3 (MinIO)
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000")
    .config("spark.hadoop.fs.s3a.access.key", "minio")
    .config("spark.hadoop.fs.s3a.secret.key", "minio123")
    .config("spark.hadoop.fs.s3a.path.style.access", "true")
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
    .config("spark.driver.host", "spark-nb")
    .getOrCreate())

# Sekarang coba baca tabelnya
print("Membaca tabel yang sudah ada...")
try:
    # Verifikasi apakah katalog terbaca
    spark.sql("USE iceberg")
    
    # Query data
    df = spark.sql("SELECT * FROM iceberg.bronze.taxi_dataset LIMIT 10")
    df.show()
    
    print("✅ Berhasil membaca tabel Iceberg!")
except Exception as e:
    print(f"❌ Masih error: {e}")

Membaca tabel yang sudah ada...


[Stage 0:>                                                          (0 + 1) / 1]

+--------+--------------------+---------------------+---------------+-------------+----------+------------------+------------+------------+------------+-----------+-----+-------+----------+------------+---------------------+------------+--------------------+-----------+
|VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|passenger_count|trip_distance|RatecodeID|store_and_fwd_flag|PULocationID|DOLocationID|payment_type|fare_amount|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|congestion_surcharge|airport_fee|
+--------+--------------------+---------------------+---------------+-------------+----------+------------------+------------+------------+------------+-----------+-----+-------+----------+------------+---------------------+------------+--------------------+-----------+
|       2| 2023-01-01 00:32:10|  2023-01-01 00:40:36|            1.0|         0.97|       1.0|                 N|         161|         141|           2|        9.3|  1.0|    0.5|       0.

                                                                                

In [4]:
spark.stop()