In [0]:
# Configuration
catalog = "cpg_industry"
gold_supply = "gold_supply"
silver_schema = "silver"

In [0]:
# Drop schema
# spark.sql(f"DROP SCHEMA IF EXISTS {catalog}.{gold_supply} CASCADE")

# Drop tables
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_b2c_kpi")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_b2b_kpi")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_top_inventory_products")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_low_stock_products")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_inventory_status_summary")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_location_inventory_value")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_location_inventory_quantity")

In [0]:
# Create schema
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{gold_supply}")

In [0]:
# Creating B2C Supply Chain KPI Metrics
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2c_kpi AS
WITH base_data AS (
    SELECT
        i.product_id,
        i.quantity_on_hand,
        p.retail_price,
        coi.total_price,
        coi.quantity,
        o.order_status,
        o.order_date,
        co.order_id AS co_order_id
    FROM {catalog}.{silver_schema}.cpg_inventory i
    JOIN {catalog}.{silver_schema}.cpg_product p 
        ON i.product_id = p.product_id
    JOIN {catalog}.{silver_schema}.cpg_consumer_order_items coi 
        ON p.product_id = coi.product_id
    JOIN {catalog}.{silver_schema}.cpg_consumer_order o 
        ON coi.order_id = o.order_id
    JOIN {catalog}.{silver_schema}.cpg_consumer_order co 
        ON o.order_id = co.order_id
),
agg_level1 AS (
    SELECT
        CURRENT_DATE() AS kpi_date,
        SUM(quantity_on_hand * retail_price) AS inventory_value,
        SUM(total_price) AS total_sales_value,
        SUM(quantity) AS total_quantity_sold,
        AVG(quantity_on_hand) AS avg_quantity_on_hand,
        COUNT(DISTINCT co_order_id) AS total_orders,
        SUM(CASE WHEN order_status = 'Returned' THEN 1 ELSE 0 END) AS backorder_count,
        COUNT(*) AS total_order_lines,
        AVG(DATEDIFF(CURRENT_DATE(), order_date)) AS avg_days_sales_inventory,
        DATEDIFF(MAX(order_date), MIN(order_date)) / 7.0 AS total_weeks_in_period
    FROM base_data
)
SELECT
    kpi_date,
    ROUND(inventory_value / NULLIF(total_sales_value, 0), 2) AS stock_to_sales_ratio,
    ROUND(total_quantity_sold / NULLIF(avg_quantity_on_hand, 0), 2) AS inventory_turnover_rate,
    ROUND(avg_quantity_on_hand / NULLIF(total_quantity_sold / NULLIF(total_weeks_in_period, 0), 0), 2) AS weeks_on_hand,
    ROUND(backorder_count * 100.0 / NULLIF(total_order_lines, 0), 2) AS backorder_rate_pct,
    ROUND(avg_days_sales_inventory, 2) AS days_sales_in_inventory
FROM agg_level1;
""")


In [0]:
# Creating B2B Supply Chain KPI Metrics
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_b2b_kpi AS
WITH base_data AS (
    SELECT
        i.product_id,
        i.quantity_on_hand,
        p.retail_price,
        coi.total_price,
        coi.quantity_ordered,
        o.order_status,
        o.order_date,
        co.purchase_id AS co_order_id
    FROM {catalog}.{silver_schema}.cpg_inventory i
    JOIN {catalog}.{silver_schema}.cpg_product p 
        ON i.product_id = p.product_id
    JOIN {catalog}.{silver_schema}.cpg_distributor_purchase_items coi 
        ON p.product_id = coi.product_id
    JOIN {catalog}.{silver_schema}.cpg_distributor_purchases o 
        ON coi.purchase_id = o.purchase_id
    JOIN {catalog}.{silver_schema}.cpg_distributor_purchases co 
        ON o.purchase_id = co.purchase_id
),
agg_level1 AS (
    SELECT
        CURRENT_DATE() AS kpi_date,
        SUM(quantity_on_hand * retail_price) AS inventory_value,
        SUM(total_price) AS total_sales_value,
        SUM(quantity_ordered) AS total_quantity_sold,
        AVG(quantity_on_hand) AS avg_quantity_on_hand,
        COUNT(DISTINCT co_order_id) AS total_orders,
        SUM(CASE WHEN order_status = 'Cancelled' THEN 1 ELSE 0 END) AS backorder_count,
        COUNT(*) AS total_order_lines,
        AVG(DATEDIFF(CURRENT_DATE(), order_date)) AS avg_days_sales_inventory,
        DATEDIFF(MAX(order_date), MIN(order_date)) AS total_days
    FROM base_data
)
SELECT
    kpi_date,
    ROUND(inventory_value / NULLIF(total_sales_value, 0), 2) AS stock_to_sales_ratio,
    ROUND(total_quantity_sold / NULLIF(avg_quantity_on_hand, 0), 2) AS inventory_turnover_rate,
    ROUND(avg_quantity_on_hand / NULLIF(total_quantity_sold / NULLIF((total_days / 7.0), 0), 0), 2) AS weeks_on_hand,
    ROUND(backorder_count * 100.0 / NULLIF(total_order_lines, 0), 2) AS backorder_rate_pct,
    ROUND(avg_days_sales_inventory, 2) AS days_sales_in_inventory
FROM agg_level1;
""")


In [0]:
# Top Inventory Products and Quantities
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_top_inventory_products AS
SELECT 
    pr.product_name,
    inv.quantity_on_hand
FROM {catalog}.{silver_schema}.cpg_inventory inv
JOIN {catalog}.{silver_schema}.cpg_product pr
    ON inv.product_id = pr.product_id
ORDER BY inv.quantity_on_hand DESC
LIMIT 10;
""")

In [0]:
# Products Below Reorder Level
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_low_stock_products AS
SELECT p.product_name, inv.quantity_on_hand, inv.reorder_level
FROM {catalog}.{silver_schema}.cpg_inventory inv
JOIN {catalog}.{silver_schema}.cpg_product p ON inv.product_id = p.product_id
WHERE inv.quantity_on_hand <= inv.reorder_level
ORDER BY inv.quantity_on_hand ASC;
""")

In [0]:
# Inventory Status Summary and Count
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_inventory_status_summary AS
SELECT 
    inventory_status,
    COUNT(*) AS count
FROM {catalog}.{silver_schema}.cpg_inventory
GROUP BY inventory_status;
""")

In [0]:
# Total Stock Value by Location and City
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_location_inventory_value AS
SELECT 
    inv.location_name,
    inv.state,
    inv.country,
    SUM(inv.quantity_on_hand * pr.retail_price) AS total_stock_value
FROM {catalog}.{silver_schema}.cpg_inventory inv
JOIN {catalog}.{silver_schema}.cpg_product pr
    ON inv.product_id = pr.product_id
GROUP BY inv.country, inv.state, inv.location_name
ORDER BY total_stock_value DESC;
""")


In [0]:
# Total Stock by Location and City
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_location_inventory_quantity AS
SELECT 
    location_name,
    state,
    country,
    SUM(quantity_on_hand) AS total_quantity
FROM {catalog}.{silver_schema}.cpg_inventory
GROUP BY country, state, location_name
ORDER BY total_quantity DESC;
""")