In [None]:
import json
with open("../../config/metadata.json", "r") as f:
    config = json.load(f)

In [None]:
# Configuration
catalog = config["catalog"]
gold_supply = config["gold_supply"]
silver_schema = config["silver_schema"]

In [0]:
# Drop schema
# spark.sql(f"DROP SCHEMA IF EXISTS {catalog}.{gold_supply} CASCADE")

# Drop tables
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_overall_kpi")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_overall_monthly_purchase_summary")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_overall_category_quantity_ordered")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_supply}.supply_overall_order_status_summary")

In [0]:
# Create schema
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{gold_supply}")

In [0]:
# Creating Combined B2B and B2C Supply Chain KPI Metrics
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_overall_kpi AS
WITH b2c_base AS (
    SELECT
        i.product_id,
        i.quantity_on_hand,
        p.retail_price,
        coi.total_price,
        coi.quantity AS quantity,
        o.order_status,
        o.order_date,
        o.order_id AS co_order_id
    FROM {catalog}.{silver_schema}.cpg_inventory i
    JOIN {catalog}.{silver_schema}.cpg_product p 
        ON i.product_id = p.product_id
    JOIN {catalog}.{silver_schema}.cpg_consumer_order_items coi 
        ON p.product_id = coi.product_id
    JOIN {catalog}.{silver_schema}.cpg_consumer_order o 
        ON coi.order_id = o.order_id
),
b2b_base AS (
    SELECT
        i.product_id,
        i.quantity_on_hand,
        p.retail_price,
        pi.total_price,
        pi.quantity_ordered AS quantity,
        dp.order_status,
        dp.order_date,
        dp.purchase_id AS co_order_id
    FROM {catalog}.{silver_schema}.cpg_inventory i
    JOIN {catalog}.{silver_schema}.cpg_product p 
        ON i.product_id = p.product_id
    JOIN {catalog}.{silver_schema}.cpg_distributor_purchase_items pi 
        ON p.product_id = pi.product_id
    JOIN {catalog}.{silver_schema}.cpg_distributor_purchases dp 
        ON pi.purchase_id = dp.purchase_id
),
combined_base AS (
    SELECT * FROM b2c_base
    UNION ALL
    SELECT * FROM b2b_base
),
agg_level1 AS (
    SELECT
        CURRENT_DATE() AS kpi_date,
        SUM(quantity_on_hand * retail_price) AS inventory_value,
        SUM(total_price) AS total_sales_value,
        SUM(quantity) AS total_quantity_sold,
        AVG(quantity_on_hand) AS avg_quantity_on_hand,
        COUNT(DISTINCT co_order_id) AS total_orders,
        SUM(CASE WHEN order_status IN ('Returned', 'Cancelled') THEN 1 ELSE 0 END) AS backorder_count,
        COUNT(*) AS total_order_lines,
        AVG(DATEDIFF(CURRENT_DATE(), order_date)) AS avg_days_sales_inventory,
        DATEDIFF(MAX(order_date), MIN(order_date)) / 7.0 AS total_weeks_in_period
    FROM combined_base
)
SELECT
    kpi_date,
    ROUND(inventory_value / NULLIF(total_sales_value, 0), 2) AS stock_to_sales_ratio,
    ROUND(total_quantity_sold / NULLIF(avg_quantity_on_hand, 0), 2) AS inventory_turnover_rate,
    ROUND(avg_quantity_on_hand / NULLIF(total_quantity_sold / NULLIF(total_weeks_in_period, 0), 0), 2) AS weeks_on_hand,
    ROUND(backorder_count * 100.0 / NULLIF(total_order_lines, 0), 2) AS backorder_rate_pct,
    ROUND(avg_days_sales_inventory, 2) AS days_sales_in_inventory
FROM agg_level1;
""")

In [0]:
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_overall_monthly_purchase_summary AS
SELECT 
    order_month,
    ROUND(SUM(total_purchase_amount), 2) AS total_purchase_amount
FROM (
    SELECT order_month, total_purchase_amount
    FROM {catalog}.{gold_supply}.supply_b2c_monthly_purchase_summary
    UNION ALL
    SELECT order_month, total_purchase_amount
    FROM {catalog}.{gold_supply}.supply_b2b_monthly_purchase_summary
) AS combined
GROUP BY order_month
ORDER BY order_month;
""")

In [0]:
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_overall_category_quantity_ordered AS
SELECT 
    category,
    SUM(total_quantity_ordered) AS total_quantity_ordered
FROM (
    SELECT category, total_quantity_ordered
    FROM {catalog}.{gold_supply}.supply_b2c_category_quantity_ordered
    UNION ALL
    SELECT category, total_quantity_ordered
    FROM {catalog}.{gold_supply}.supply_b2b_category_quantity_ordered
) AS combined
GROUP BY category
ORDER BY total_quantity_ordered DESC;
""")

In [0]:
spark.sql(f"""
CREATE TABLE {catalog}.{gold_supply}.supply_overall_order_status_summary AS
SELECT 
    order_status,
    SUM(count) AS count
FROM (
    SELECT order_status, count
    FROM {catalog}.{gold_supply}.supply_b2c_order_status_summary
    UNION ALL
    SELECT order_status, count
    FROM {catalog}.{gold_supply}.supply_b2b_order_status_summary
) AS combined
GROUP BY order_status;
""")