In [0]:
# Configuration
catalog = "cpg_industry"
gold_product = "gold_product"
silver_schema = "silver"

In [0]:
# Drop schema
# spark.sql(f"DROP SCHEMA IF EXISTS {catalog}.{gold_product} CASCADE")

In [0]:
# Create schema
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{gold_product}")

In [0]:
# Generate B2C Product Table with Sales and Profit Data
b2c_sql = f"""
CREATE TABLE {catalog}.{gold_product}.product_overall_b2c AS
WITH product_b2c AS (
    SELECT
        coi.order_item_id,
        coi.product_id,
        coi.quantity,
        coi.unit_price AS selling_price,
        coi.unit_price - p.unit_price AS margin,
        coi.unit_price * coi.quantity AS total_amount,
        ((coi.unit_price - p.unit_price) / coi.unit_price) * 100 AS profit_margin,
        coi.order_id as s_oid,
        p.product_name,
        p.department,
        p.category,
        p.brand,
        p.retail_price,
        p.unit_price AS cost_price,
        p.release_date,
        p.product_status,
        co.order_id as c_oid,
        co.order_date as ordered_date,
        co.order_status,
        co.total_amount as total_amount_order,
        co.payment_method,
        c.name,
        c.gender,
        c.age,
        c.registration_date,
        c.is_active,
        c.city,
        c.state,
        c.country
    FROM {catalog}.{silver_schema}.cpg_consumer_order_items coi
    LEFT JOIN {catalog}.{silver_schema}.cpg_consumer_order co ON coi.order_id = co.order_id
    LEFT JOIN {catalog}.{silver_schema}.cpg_consumer c ON co.consumer_id = c.consumer_id
    LEFT JOIN {catalog}.{silver_schema}.cpg_product p ON coi.product_id = p.product_id
)
SELECT * FROM product_b2c;
"""
spark.sql(b2c_sql)

display(spark.sql(f"SELECT * FROM {catalog}.{gold_product}.product_overall_b2c"))

In [0]:
# Generate B2B Product Table with Pricing and Margin Data
b2b_sql = f"""
CREATE TABLE {catalog}.{gold_product}.product_overall_b2b AS
SELECT pi.purchase_item_id,
       pi.purchase_id,
       pi.product_id,
       pi.quantity_ordered,
       round(pi.unit_cost,2) as selling_price,
       (pi.unit_cost - p.unit_price) as margin,
       round(pi.unit_cost * pi.quantity_ordered,2) as total_amount,
       ((pi.unit_cost - p.unit_price) / pi.unit_cost) * 100 as profit_margin,
       p.product_name,
       p.category,
       p.brand,
       p.retail_price,
       round(p.unit_price,2) as cost_price,
       p.release_date,
       p.product_status,
       d.distributor_id,
       d.company_name,
       d.city,
       d.state,
       d.country,
       d.distributor_name
FROM {catalog}.bronze.cpg_distributor_purchase_items pi
LEFT JOIN {catalog}.{silver_schema}.cpg_distributor_purchases dp ON pi.purchase_id = dp.purchase_id
LEFT JOIN {catalog}.{silver_schema}.cpg_distributor d ON dp.distributor_id = d.distributor_id
LEFT JOIN {catalog}.bronze.cpg_product p ON p.product_id = pi.product_id;
"""
spark.sql(b2b_sql)

display(spark.sql(f"SELECT * FROM {catalog}.{gold_product}.product_overall_b2b"))