In [20]:
# Partitioned Table (region)
spark.sql("""
CREATE TABLE local.db.sales_by_region (
    id INT,
    region STRING,
    amount DOUBLE
)
USING ICEBERG
PARTITIONED BY (region)
TBLPROPERTIES ('format-version' = '2')
""")
spark.sql("""
INSERT INTO local.db.sales_by_region VALUES
(1, 'North', 250.75),
(2, 'South', 300.25),
(3, 'North', 150.00)
""")
spark.sql("""SELECT * FROM local.db.sales_by_region""").show()


+---+------+------+
| id|region|amount|
+---+------+------+
|  2| South|300.25|
|  1| North|250.75|
|  3| North| 150.0|
+---+------+------+



In [22]:
# Partitioned by months(sale_date)
spark.sql("""
CREATE TABLE local.db.monthly_sales (
    id INT,
    amount DOUBLE,
    sale_date DATE
)
USING ICEBERG
PARTITIONED BY (months(sale_date))
TBLPROPERTIES ('format-version' = '2')
""")
spark.sql("""
INSERT INTO local.db.monthly_sales VALUES
(1, 1200.00, DATE '2024-01-15'),
(2, 1300.50, DATE '2024-01-22'),
(3, 1150.75, DATE '2024-02-05')
""")
spark.sql("""SELECT * FROM local.db.monthly_sales""").show()


+---+-------+----------+
| id| amount| sale_date|
+---+-------+----------+
|  1| 1200.0|2024-01-15|
|  2| 1300.5|2024-01-22|
|  3|1150.75|2024-02-05|
+---+-------+----------+



In [23]:
# Multi-Column Partitioning (region, month)
spark.sql("""
CREATE TABLE local.db.sales_by_region_and_month (
    id INT,
    region STRING,
    amount DOUBLE,
    sale_date DATE
)
USING ICEBERG
PARTITIONED BY (region, months(sale_date))
TBLPROPERTIES ('format-version' = '2')
""")
spark.sql("""
INSERT INTO local.db.sales_by_region_and_month VALUES
(1, 'East', 150.00, DATE '2024-01-01'),
(2, 'East', 180.00, DATE '2024-01-15'),
(3, 'West', 210.00, DATE '2024-02-01')
""")
spark.sql("""SELECT * FROM local.db.sales_by_region_and_month""").show()


+---+------+------+----------+
| id|region|amount| sale_date|
+---+------+------+----------+
|  1|  East| 150.0|2024-01-01|
|  2|  East| 180.0|2024-01-15|
|  3|  West| 210.0|2024-02-01|
+---+------+------+----------+



In [24]:
# Bucketed Table
spark.sql("""
CREATE TABLE local.db.bucketed_users (
    user_id INT,
    name STRING
)
USING ICEBERG
PARTITIONED BY (bucket(4, user_id))
TBLPROPERTIES ('format-version' = '2')
""")
spark.sql("""
INSERT INTO local.db.bucketed_users VALUES
(1, 'Alice'),
(2, 'Bob'),
(3, 'Charlie'),
(4, 'David')
""")
spark.sql("""SELECT * FROM local.db.bucketed_users""").show()


+-------+-------+
|user_id|   name|
+-------+-------+
|      1|  Alice|
|      2|    Bob|
|      4|  David|
|      3|Charlie|
+-------+-------+



In [25]:
spark.sql("DESCRIBE TABLE EXTENDED local.db.bucketed_users").show(truncate=False)

+----------------------------+----------------------------------------------------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                                                             |comment|
+----------------------------+----------------------------------------------------------------------------------------------------------------------+-------+
|user_id                     |int                                                                                                                   |NULL   |
|name                        |string                                                                                                                |NULL   |
|                            |                                                                                                                      |       |
|# Partitioning              |                      