Import Required Libraries and Spark SQL Functions

In [0]:
import os
from pyspark.sql.functions import *

Define Gold and Silver Layer Base Paths

In [0]:
silver_base = "/Volumes/adventure_works_lakehouse/adventure_works/lakehouse/silver"
gold_base   = "/Volumes/adventure_works_lakehouse/adventure_works/lakehouse/gold"

Load Gold Layer Sales, Returns, and Product Dimension Tables

In [0]:
df_fact_sales = spark.read.format("delta").load(f"{gold_base}/fact_sales")

df_fact_returns = spark.read.format("delta").load(f"{gold_base}/fact_returns")

df_dim_product = spark.read.format("delta").load(f"{gold_base}/dim_product")

Aggregate Sales Metrics by Product

In [0]:
df_sales_by_product = (
    df_fact_sales
    .groupBy("ProductKey")
    .agg(
        round(sum("Revenue"), 2).alias("TotalSales"),
        round(sum("Profit"), 2).alias("TotalProfit")
    )
)

Aggregate Return Metrics by Product

In [0]:
df_returns_by_product = (
    df_fact_returns
    .groupBy("ProductKey")
    .agg(sum("ReturnQuantity").alias("TotalReturns"))
)

Analyze Product Performance with Sales and Return Metrics

In [0]:
df_product_perf = (
    df_sales_by_product
    .join(df_returns_by_product, "ProductKey", "left")
    .join(df_dim_product, "ProductKey")
    .withColumn(
        "ReturnRate",
        round((col("TotalReturns") * 100.0) / col("TotalSales"), 2)
    )
    .select(
        "ProductName",
        "CategoryName",
        "TotalSales",
        "TotalReturns",
        "ReturnRate",
        "TotalProfit"
    )
)

Write Product Performance Data to Gold Layer

In [0]:
df_product_perf.write.format("delta")\
    .mode("overwrite")\
    .save(f"{gold_base}/product_perf")

Save Product Performance as a Gold Table

In [0]:
df_product_perf.write.format("delta")\
    .mode("overwrite")\
    .saveAsTable("adventure_works_lakehouse.adventure_works.product_perf")


In [0]:
%sql
SELECT * FROM adventure_works_lakehouse.adventure_works.product_perf

ProductName,CategoryName,TotalSales,TotalReturns,ReturnRate,TotalProfit
Touring-3000,Bikes,41571.6,1.0,0.0,15730.69
Mountain-200,Bikes,1241753.51,18.0,0.0,571632.65
Mountain-400-W,Bikes,112345.54,3.0,0.0,51057.89
Road-650,Bikes,50335.07,3.0,0.01,20588.54
Road-550-W,Bikes,281122.94,10.0,0.0,110935.51
Mountain-500,Bikes,25919.52,2.0,0.01,11779.69
Road-150,Bikes,604727.63,4.0,0.0,237778.91
Mountain-400-W,Bikes,96955.74,4.0,0.0,44063.66
Road-250,Bikes,344512.35,3.0,0.0,130363.47
Road-350-W,Bikes,396330.67,9.0,0.0,144105.84


Databricks visualization. Run in Databricks to view.