In [0]:
#Description:Customer lifetime view combining transactions and reviews.
#Grain:One row per customer
#Built From:customers,transactions,reviews
#Metrics:total_orders,total_revenue,avg_order_value,first_order_date,last_order_date,review_count,avg_rating

In [0]:
transactions_silver_df=spark.read.table("git_analysis.silver.transactions")

In [0]:
reviews_silver_df=spark.read.table("git_analysis.silver.reviews")

In [0]:
customers_silver_df=spark.read.table("git_analysis.silver.customers")

In [0]:
from pyspark.sql import functions as F
customer_sales_df = (
    transactions_silver_df
    .groupBy("customer_id")
    .agg(
        F.countDistinct("order_id").alias("total_orders"),
        F.sum(F.col("quantity") * F.col("unit_price")).alias("total_revenue"),
        F.avg(F.col("quantity") * F.col("unit_price")).alias("avg_order_value"),
        F.min("order_timestamp").alias("first_order_date"),
        F.max("order_timestamp").alias("last_order_date")
    )
)

customer_reviews_df = (
    reviews_silver_df
    .groupBy("customer_id")
    .agg(
        F.count("review_id").alias("review_count"),
        F.avg("rating").alias("avg_rating")
    )
)

gold_customer_360_df = (
    customers_silver_df
    .join(customer_sales_df, "customer_id", "left")
    .join(customer_reviews_df, "customer_id", "left")
)


In [0]:
gold_customer_360_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("git_analysis.gold.gold_customer_360")