Import Required Libraries and Spark SQL Functions

In [0]:
import os
from pyspark.sql.functions import *

Define Gold and Silver Layer Base Paths

In [0]:
silver_base = "/Volumes/adventure_works_lakehouse/adventure_works/lakehouse/silver"
gold_base   = "/Volumes/adventure_works_lakehouse/adventure_works/lakehouse/gold"

Load Sales Fact Table from Gold Layer

In [0]:
df_fact_sales = spark.read.format("delta").load(f"{gold_base}/fact_sales")

Aggregate Customer Metrics for Analysis

In [0]:
df_customer_analysis = (
    df_fact_sales
    .groupBy("CustomerKey")
    .agg(
        round(sum("Revenue"), 4).alias("LifetimeValue"),
        countDistinct("OrderNumber").alias("TotalOrders"),
        round(avg("Revenue"), 4).alias("AvgOrderValue")
    )
)

Write Customer Analysis Data to Gold Layer

In [0]:
df_customer_analysis.write.format("delta")\
    .mode("overwrite")\
    .save(f"{gold_base}/customer_analysis")

Save Customer Analysis as a Gold Table

In [0]:
df_customer_analysis.write.format("delta")\
    .mode("overwrite")\
    .saveAsTable("adventure_works_lakehouse.adventure_works.customer_analysis")


In [0]:
%sql
SELECT * FROM adventure_works_lakehouse.adventure_works.customer_analysis

CustomerKey,LifetimeValue,TotalOrders,AvgOrderValue
19293,2448.1555,2,816.0518
18714,46.25,2,23.125
24425,2417.7142,1,1208.8571
14322,4862.8678,3,1620.9559
11451,8044.2024,3,893.8003
22731,134.94,1,44.98
16557,66.53,1,22.1767
26259,101.6042,1,33.8681
17149,1734.6342,1,867.3171
22773,267.93,1,66.9825
