In [None]:
import json
with open("../../config/metadata.json", "r") as f:
    config = json.load(f)

In [0]:
# Configuration
catalog = config["catalog"]
gold_sales = config["gold_sales"]
silver_schema = config["silver_schema"]

In [0]:
# Drop schema
# spark.sql(f"DROP SCHEMA IF EXISTS {catalog}.{gold_sales} CASCADE")
# spark.sql(f"DROP TABLE IF EXISTS {catalog}.{gold_sales}.b2b_overall_kpis")

In [0]:
# Create schema
spark.sql(f"CREATE SCHEMA  IF NOT EXISTS {catalog}.{gold_sales}")

### B2B Overall Kpis

In [0]:
# B2B Overall KPIs
spark.sql(f"""
CREATE OR REPLACE TABLE {catalog}.{gold_sales}.b2b_overall_kpis AS
WITH base AS (
    SELECT
        dpi.purchase_item_id,
        dpi.product_id,
        dpi.quantity_ordered AS quantity,
        dpi.unit_cost AS selling_price,
        dpi.unit_cost - p.unit_price AS margin,
        dpi.unit_cost * dpi.quantity_ordered AS total_amount,
        ((dpi.unit_cost - p.unit_price) / dpi.unit_cost) * 100 AS profit_margin,
        dpi.purchase_id AS s_pid,
        p.product_name,
        p.department,
        p.category,
        p.brand,
        p.retail_price,
        p.unit_price AS cost_price,
        p.release_date,
        p.product_status,
        dp.purchase_id AS c_pid,
        dp.order_date AS ordered_date,
        dp.order_status,
        dp.total_amount AS total_amount_order,
        di.distributor_id,
        di.distributor_name,
        di.company_name,
        di.rating,
        di.city,
        di.state,
        di.country,
        i.location_name,
        i.inventory_id,
        i.quantity_on_hand,
        i.inventory_status,
        i.reorder_level
    FROM {catalog}.{silver_schema}.cpg_distributor_purchase_items dpi
    LEFT JOIN {catalog}.{silver_schema}.cpg_distributor_purchases dp ON dpi.purchase_id = dp.purchase_id
    LEFT JOIN {catalog}.{silver_schema}.cpg_distributor di ON dp.distributor_id = di.distributor_id
    LEFT JOIN {catalog}.{silver_schema}.cpg_product p ON dpi.product_id = p.product_id
    LEFT JOIN {catalog}.{silver_schema}.cpg_inventory i ON p.product_id = i.product_id
),
total_metrics AS (
    SELECT
        COALESCE(SUM(total_amount), 0) AS total_distributor_sales,
        COALESCE(AVG(total_amount), 0) AS average_order_value,
        COALESCE(COUNT(DISTINCT purchase_id), 0) AS total_order_count
    FROM {catalog}.{silver_schema}.cpg_distributor_purchases
),
order_frequency AS (
    SELECT COALESCE(AVG(order_count), 0) AS order_frequency
    FROM (
        SELECT distributor_id, COUNT(purchase_id) AS order_count
        FROM {catalog}.{silver_schema}.cpg_distributor_purchases
        GROUP BY distributor_id
    ) sub
),
active_distributors AS (
    SELECT COALESCE(COUNT(DISTINCT distributor_id), 0) AS active_distributor_count
    FROM {catalog}.{silver_schema}.cpg_distributor
),
orders_sales AS (
    SELECT
        distributor_id,
        COUNT(DISTINCT c_pid) AS total_orders,
        SUM(total_amount) AS total_sales_revenue,
        ROUND(SUM(total_amount) / NULLIF(COUNT(DISTINCT c_pid), 0)) AS avg_order_value
    FROM base
    GROUP BY distributor_id
),
sales_category AS (
    SELECT distributor_id, category, SUM(total_amount) AS sales_by_category
    FROM base
    GROUP BY distributor_id, category
),
sales_brand AS (
    SELECT distributor_id, brand, SUM(total_amount) AS sales_by_brand
    FROM base
    GROUP BY distributor_id, brand
),
sales_product AS (
    SELECT distributor_id, product_id, SUM(total_amount) AS sales_by_product
    FROM base
    GROUP BY distributor_id, product_id
),
top_selling AS (
    SELECT
        distributor_id,
        product_id,
        SUM(quantity) AS total_qty,
        RANK() OVER (PARTITION BY distributor_id ORDER BY SUM(quantity) DESC) AS rnk
    FROM base
    GROUP BY distributor_id, product_id
),
top_selling_product AS (
    SELECT
        t.distributor_id,
        t.product_id AS top_product_id,
        p.product_name AS top_product_name
    FROM top_selling t
    JOIN {catalog}.{silver_schema}.cpg_product p ON t.product_id = p.product_id
    WHERE rnk = 1
),
low_stock AS (
    SELECT DISTINCT product_id
    FROM {catalog}.{silver_schema}.cpg_inventory
    WHERE quantity_on_hand < reorder_level
),
store_revenue AS (
    SELECT location_name, SUM(total_amount) AS revenue
    FROM base
    GROUP BY location_name
),
monthly_sales AS (
    SELECT DATE_TRUNC('month', ordered_date) AS month, SUM(total_amount) AS monthly_sales
    FROM base
    GROUP BY DATE_TRUNC('month', ordered_date)
),
clv AS (
    SELECT distributor_id, SUM(total_amount) AS customer_lifetime_value
    FROM base
    GROUP BY distributor_id
)
SELECT
    p.purchase_item_id,
    p.product_id,
    p.quantity,
    p.selling_price,
    p.total_amount,
    p.s_pid,
    p.product_name,
    p.department,
    p.category,
    p.brand,
    p.retail_price,
    p.cost_price,
    p.release_date,
    p.product_status,
    p.c_pid,
    p.ordered_date,
    p.order_status,
    p.total_amount_order,
    p.distributor_id,
    p.distributor_name,
    p.company_name,
    p.rating,
    p.city,
    p.state,
    p.country,
    p.location_name,
    p.inventory_id,
    p.quantity_on_hand,
    p.inventory_status,
    p.reorder_level,
    o.total_orders,
    o.total_sales_revenue,
    o.avg_order_value,
    sc.sales_by_category,
    sb.sales_by_brand,
    sp.sales_by_product,
    ts.top_product_id,
    ts.top_product_name,
    CASE WHEN ls.product_id IS NOT NULL THEN 1 ELSE 0 END AS is_low_stock,
    sr.revenue AS store_revenue,
    ms.month,
    ms.monthly_sales,
    c.customer_lifetime_value,
    tm.total_distributor_sales,
    ROUND(tm.average_order_value) AS global_average_order_value,
    tm.total_order_count,
    ROUND(of.order_frequency) AS avg_order_frequency_per_distributor,
    ad.active_distributor_count
FROM base p
LEFT JOIN orders_sales o ON p.distributor_id = o.distributor_id
LEFT JOIN sales_category sc ON p.distributor_id = sc.distributor_id AND p.category = sc.category
LEFT JOIN sales_brand sb ON p.distributor_id = sb.distributor_id AND p.brand = sb.brand
LEFT JOIN sales_product sp ON p.distributor_id = sp.distributor_id AND p.product_id = sp.product_id
LEFT JOIN top_selling_product ts ON p.distributor_id = ts.distributor_id
LEFT JOIN low_stock ls ON p.product_id = ls.product_id
LEFT JOIN store_revenue sr ON p.department = sr.location_name
LEFT JOIN monthly_sales ms ON DATE_TRUNC('month', p.ordered_date) = ms.month
LEFT JOIN clv c ON p.distributor_id = c.distributor_id
CROSS JOIN total_metrics tm
CROSS JOIN order_frequency of
CROSS JOIN active_distributors ad;
""")