In [0]:
# Configuration
catalog = "cpg_industry"
gold_supply = "gold_supply"
silver_schema = "silver"

In [0]:
# Drop schema
# spark.sql(f"DROP SCHEMA IF EXISTS {catalog}.{gold_supply} CASCADE")

In [0]:
# Create schema
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{gold_supply}")

In [0]:
# Monthly B2C Purchase Total by Order Date
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2c_monthly_purchase_summary AS
SELECT
    DATE_TRUNC('month', o.order_date) AS order_month,
    ROUND(SUM(coi.total_price),2) AS total_purchase_amount
FROM {catalog}.{silver_schema}.cpg_consumer_order_items coi
JOIN {catalog}.{silver_schema}.cpg_consumer_order o ON coi.order_id = o.order_id
GROUP BY DATE_TRUNC('month', o.order_date)
ORDER BY order_month;
""")

In [0]:
# Purchase Summary by State and Country
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2c_purchase_summary_by_location AS
SELECT 
    c.state,
    c.country,
    ROUND(SUM(o.total_amount), 2) AS total_purchase_value
FROM {catalog}.{silver_schema}.cpg_consumer c
JOIN {catalog}.{silver_schema}.cpg_consumer_order o
    ON c.consumer_id = o.consumer_id
GROUP BY c.state, c.country
ORDER BY total_purchase_value DESC;
""")

In [0]:
# Order Quantity by Product Category
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2c_category_quantity_ordered AS
SELECT 
    pr.category,
    SUM(coi.quantity) AS total_quantity_ordered
FROM {catalog}.{silver_schema}.cpg_consumer_order_items coi
JOIN {catalog}.{silver_schema}.cpg_product pr
    ON coi.product_id = pr.product_id
GROUP BY pr.category
ORDER BY total_quantity_ordered DESC;
""")

In [0]:
# Order Status Summary and Count
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2c_order_status_summary AS
SELECT 
    order_status,
    COUNT(*) AS count
FROM {catalog}.{silver_schema}.cpg_consumer_order
GROUP BY order_status;
""")