In [0]:
# Configuration
catalog = "cpg_industry"
gold_supply = "gold_supply"
silver_schema = "silver"

In [0]:
# Drop schema
# spark.sql(f"DROP SCHEMA IF EXISTS {catalog}.{gold_supply} CASCADE")

In [0]:
# Create schema
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{gold_supply}")

In [0]:
# Monthly Purchase Totals by Order Date
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2b_monthly_purchase_summary AS
SELECT
    DATE_TRUNC('month', p.order_date) AS order_month,
    ROUND(SUM(p.total_amount),2) AS total_purchase_amount
FROM {catalog}.{silver_schema}.cpg_distributor_purchases p
GROUP BY DATE_TRUNC('month', p.order_date)
ORDER BY order_month;
""")

In [0]:
# Top Distributors by Purchase Value
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2b_top_distributor_purchase_value AS
SELECT 
    d.distributor_name,
    ROUND(SUM(p.total_amount),2) AS total_purchase_value
FROM {catalog}.{silver_schema}.cpg_distributor d
JOIN {catalog}.{silver_schema}.cpg_distributor_purchases p
    ON d.distributor_id = p.distributor_id
GROUP BY d.distributor_name
ORDER BY total_purchase_value DESC;
""")

In [0]:
# Product Categories by Total Quantity Ordered
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2b_category_quantity_ordered AS
SELECT 
    pr.category,
    SUM(pi.quantity_ordered) AS total_quantity_ordered
FROM {catalog}.{silver_schema}.cpg_distributor_purchase_items pi
JOIN {catalog}.{silver_schema}.cpg_product pr
    ON pi.product_id = pr.product_id
GROUP BY pr.category
ORDER BY total_quantity_ordered DESC;
""")

In [0]:
# Order Status Distribution Summary
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2b_order_status_summary AS
SELECT 
    order_status,
    COUNT(*) AS count
FROM {catalog}.{silver_schema}.cpg_distributor_purchases
GROUP BY order_status;
""")