In [9]:
from pyspark.sql import SparkSession

# Initialize Spark session with Iceberg configurations
spark = SparkSession.builder \
  .appName("IcebergLocalDevelopment") \
  .config('spark.jars.packages', 'org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1') \
  .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
  .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") \
  .config("spark.sql.catalog.local.type", "hadoop") \
  .config("spark.sql.catalog.local.warehouse", "spark-warehouse/iceberg") \
  .getOrCreate()

In [10]:
spark

In [11]:
df = spark.read.option("header", True).csv("../../data/listings.csv")

df = df.selectExpr(
    "id as listing_id",
    "host_id",
    "host_name",
    "neighbourhood as city",
    "room_type as property_type",
    "cast(price as double) as price"
)

df.show()


+----------+-------+--------------------+---------------+---------------+------+
|listing_id|host_id|           host_name|           city|  property_type| price|
+----------+-------+--------------------+---------------+---------------+------+
|     17878|  68997|            Matthias|     Copacabana|Entire home/apt| 254.0|
|     25026| 102840|             Viviane|     Copacabana|Entire home/apt| 252.0|
|     35764| 153691|Patricia Miranda ...|     Copacabana|Entire home/apt| 190.0|
|     48305|  70933|             Goitaca|        Ipanema|Entire home/apt|2239.0|
|     48901| 222884|              Marcio|     Copacabana|Entire home/apt| 743.0|
|     49179| 224192|               David|     Copacabana|Entire home/apt| 189.0|
|     50759| 233554|              Felipe|Barra da Tijuca|Entire home/apt|2800.0|
|     51703| 238091|               Dalia|     Copacabana|Entire home/apt| 201.0|
|     53533| 249439|      Sherri & Andre|            Joá|Entire home/apt|1467.0|
|     64795|  93005|        

In [12]:
spark.sql("""
CREATE TABLE local.db_listings (
    listing_id STRING,
    host_id STRING,
    host_name STRING,
    city STRING,
    property_type STRING,
    price DOUBLE
) USING iceberg
""")


AnalysisException: [TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create table or view `db_listings` because it already exists.
Choose a different name, drop or replace the existing object, or add the IF NOT EXISTS clause to tolerate pre-existing objects.

In [17]:
spark.sql("SELECT * FROM local.db_listings").show()

+----------+-------+--------------------+---------------+---------------+------+
|listing_id|host_id|           host_name|           city|  property_type| price|
+----------+-------+--------------------+---------------+---------------+------+
|     17878|  68997|            Matthias|     Copacabana|Entire home/apt| 254.0|
|     25026| 102840|             Viviane|     Copacabana|Entire home/apt| 252.0|
|     35764| 153691|Patricia Miranda ...|     Copacabana|Entire home/apt| 190.0|
|     48305|  70933|             Goitaca|        Ipanema|Entire home/apt|2239.0|
|     48901| 222884|              Marcio|     Copacabana|Entire home/apt| 743.0|
|     49179| 224192|               David|     Copacabana|Entire home/apt| 189.0|
|     50759| 233554|              Felipe|Barra da Tijuca|Entire home/apt|2800.0|
|     51703| 238091|               Dalia|     Copacabana|Entire home/apt| 201.0|
|     53533| 249439|      Sherri & Andre|            Joá|Entire home/apt|1467.0|
|     64795|  93005|        

In [18]:
spark.sql(
    """
    alter table local.db_listings add column email STRING, phone INT
    """
)

DataFrame[]

In [19]:
spark.sql("SELECT * FROM local.db_listings").show()

+----------+-------+--------------------+---------------+---------------+------+-----+-----+
|listing_id|host_id|           host_name|           city|  property_type| price|email|phone|
+----------+-------+--------------------+---------------+---------------+------+-----+-----+
|     17878|  68997|            Matthias|     Copacabana|Entire home/apt| 254.0| NULL| NULL|
|     25026| 102840|             Viviane|     Copacabana|Entire home/apt| 252.0| NULL| NULL|
|     35764| 153691|Patricia Miranda ...|     Copacabana|Entire home/apt| 190.0| NULL| NULL|
|     48305|  70933|             Goitaca|        Ipanema|Entire home/apt|2239.0| NULL| NULL|
|     48901| 222884|              Marcio|     Copacabana|Entire home/apt| 743.0| NULL| NULL|
|     49179| 224192|               David|     Copacabana|Entire home/apt| 189.0| NULL| NULL|
|     50759| 233554|              Felipe|Barra da Tijuca|Entire home/apt|2800.0| NULL| NULL|
|     51703| 238091|               Dalia|     Copacabana|Entire home/a

In [22]:
spark.sql('update local.db_listings set email = "goiaba@email.com", phone = 48954652 where listing_id = 25026   ')

DataFrame[]

In [20]:
spark.sql('update local.db_listings set email = "fulano123@email.com", phone = 8726512 where listing_id = 35764')

DataFrame[]

In [24]:
spark.sql("SELECT * FROM local.db_listings where listing_id = 35764").show()

+----------+-------+--------------------+----------+---------------+-----+-------------------+-------+
|listing_id|host_id|           host_name|      city|  property_type|price|              email|  phone|
+----------+-------+--------------------+----------+---------------+-----+-------------------+-------+
|     35764| 153691|Patricia Miranda ...|Copacabana|Entire home/apt|190.0|fulano123@email.com|8726512|
|     35764| 153691|Patricia Miranda ...|Copacabana|Entire home/apt|190.0|fulano123@email.com|8726512|
+----------+-------+--------------------+----------+---------------+-----+-------------------+-------+



In [25]:
spark.sql("SELECT * FROM local.db_listings WHERE listing_id = '35764'").show()

+----------+-------+--------------------+----------+---------------+-----+-------------------+-------+
|listing_id|host_id|           host_name|      city|  property_type|price|              email|  phone|
+----------+-------+--------------------+----------+---------------+-----+-------------------+-------+
|     35764| 153691|Patricia Miranda ...|Copacabana|Entire home/apt|190.0|fulano123@email.com|8726512|
|     35764| 153691|Patricia Miranda ...|Copacabana|Entire home/apt|190.0|fulano123@email.com|8726512|
+----------+-------+--------------------+----------+---------------+-----+-------------------+-------+



In [11]:
spark.sql("""
DELETE FROM local.db_listings
WHERE listing_id = '35764'
""")

DataFrame[]

In [12]:
spark.sql("SELECT * FROM local.db_listings WHERE listing_id = '35764'").show()

+----------+-------+---------+----+-------------+-----+
|listing_id|host_id|host_name|city|property_type|price|
+----------+-------+---------+----+-------------+-----+
+----------+-------+---------+----+-------------+-----+



In [13]:
spark.sql("SELECT * FROM local.db_listings WHERE listing_id = '17878'").show()

+----------+-------+---------+----------+---------------+-----+
|listing_id|host_id|host_name|      city|  property_type|price|
+----------+-------+---------+----------+---------------+-----+
|     17878|  68997| Matthias|Copacabana|Entire home/apt|254.0|
+----------+-------+---------+----------+---------------+-----+



In [14]:
spark.sql("""
UPDATE local.db_listings
SET price = 500
WHERE listing_id = '17878'
""")

DataFrame[]

In [15]:
spark.sql("SELECT * FROM local.db_listings WHERE listing_id = '17878'").show()

+----------+-------+---------+----------+---------------+-----+
|listing_id|host_id|host_name|      city|  property_type|price|
+----------+-------+---------+----------+---------------+-----+
|     17878|  68997| Matthias|Copacabana|Entire home/apt|500.0|
+----------+-------+---------+----------+---------------+-----+



In [16]:
spark.sql("DROP TABLE local.db_listings")

DataFrame[]