In [0]:
%run ./01_Project_Config

In [0]:
# Set the database context so SQL queries know where to look
spark.sql(f"use catalog {catalog}")
spark.sql(f"use schema gold")

print(f"Context set to {catalog}.gold. You can now run the SQL cells below.")

### # ## **Which brands are  top performers in terms of Net Revenue?**

In [0]:
%sql
SELECT 
    p.brand, 
    SUM(f.net_sales) AS total_net_revenue
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_product_gold p ON f.product_key = p.product_key
GROUP BY p.brand
ORDER BY total_net_revenue DESC
LIMIT 5;

In [0]:
%sql
CREATE OR REPLACE MATERIALIZED VIEW retail_lakehouse.gold.mv_top_brands
AS
SELECT 
    p.brand, 
    SUM(f.net_sales) AS total_net_revenue
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_product_gold p ON f.product_key = p.product_key
GROUP BY p.brand;

Which product categories have the highest percentage of returns?

In [0]:
%sql
SELECT 
    p.category, 
    SUM(f.quantity) AS total_sold,
    SUM(f.return_qty) AS total_returned,
    (SUM(f.return_qty) / SUM(f.quantity)) * 100 AS return_percentage
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_product_gold p ON f.product_key = p.product_key
GROUP BY p.category
HAVING total_sold > 0
ORDER BY return_percentage DESC;

In [0]:
%sql
CREATE OR REPLACE MATERIALIZED VIEW retail_lakehouse.gold.mv_category_returns
AS
SELECT 
    p.category, 
    SUM(f.quantity) AS total_sold,
    SUM(f.return_qty) AS total_returned,
    (SUM(f.return_qty) / NULLIF(SUM(f.quantity), 0)) * 100 AS return_percentage
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_product_gold p ON f.product_key = p.product_key
GROUP BY p.category;

Which stores are running low on stock and need immediate replenishment?

In [0]:
%sql
SELECT 
    s.store_name, 
    p.product_name, 
    i.stock_on_hand, 
    i.reorder_point
FROM retail_lakehouse.gold.fact_inventory_daily i
JOIN retail_lakehouse.gold.dim_store_gold s ON i.store_key = s.store_key
JOIN retail_lakehouse.gold.dim_product_gold p ON i.product_key = p.product_key
WHERE i.stock_on_hand <= i.reorder_point
ORDER BY i.stock_on_hand ASC;

In [0]:
%sql
CREATE OR REPLACE MATERIALIZED VIEW retail_lakehouse.gold.mv_stockout_alerts
AS
SELECT 
    s.store_name, 
    p.product_name, 
    i.stock_on_hand, 
    i.reorder_point
FROM retail_lakehouse.gold.fact_inventory_daily i
JOIN retail_lakehouse.gold.dim_store_gold s ON i.store_key = s.store_key
JOIN retail_lakehouse.gold.dim_product_gold p ON i.product_key = p.product_key
WHERE i.stock_on_hand <= i.reorder_point;

Are our promotions actually driving revenue, or are they just eating into our margins?

In [0]:
%sql
SELECT 
    pr.promo_type,
    COUNT(f.txn_id) AS transaction_count,
    SUM(f.discount_amount) AS total_discount_given,
    SUM(f.net_sales) AS total_net_revenue,
    (SUM(f.net_sales) / SUM(f.discount_amount)) AS revenue_per_discount_dollar
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_promotion_gold pr ON f.promo_key = pr.promo_key
GROUP BY pr.promo_type
ORDER BY revenue_per_discount_dollar DESC;

In [0]:
%sql
CREATE OR REPLACE MATERIALIZED VIEW retail_lakehouse.gold.mv_promo_effectiveness
AS
SELECT 
    pr.promo_type,
    COUNT(f.txn_id) AS transaction_count,
    SUM(f.discount_amount) AS total_discount_given,
    SUM(f.net_sales) AS total_net_revenue,
    (SUM(f.net_sales) / NULLIF(SUM(f.discount_amount), 0)) AS revenue_per_discount_dollar
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_promotion_gold pr ON f.promo_key = pr.promo_key
GROUP BY pr.promo_type;

Which cities are our biggest revenue drivers, and what is the average transaction value (ATV) in those cities?

In [0]:
%sql
SELECT 
    s.city,
    COUNT(DISTINCT f.txn_id) AS total_orders,
    SUM(f.net_sales) AS total_revenue,
    (SUM(f.net_sales) / COUNT(DISTINCT f.txn_id)) AS avg_transaction_value
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_store_gold s ON f.store_key = s.store_key
GROUP BY s.city
ORDER BY total_revenue DESC;

In [0]:
%sql
CREATE MATERIALIZED VIEW retail_lakehouse.gold.mv_regional_sales
AS
SELECT 
    s.city,
    COUNT(DISTINCT f.txn_id) AS total_orders,
    SUM(f.net_sales) AS total_revenue,
    (SUM(f.net_sales) / NULLIF(COUNT(DISTINCT f.txn_id), 0)) AS avg_transaction_value
FROM retail_lakehouse.gold.fact_sales f
JOIN retail_lakehouse.gold.dim_store_gold s ON f.store_key = s.store_key
GROUP BY s.city;

In [0]:
%sql
select * from retail_lakehouse.gold.mv_regional_sales