In [0]:
#Description
#Core transactional fact table used for revenue analytics, dashboards, and BI reporting.
#One row per order line (order_id + product_id)
#Built From: transactions,orders,products,customers
#Metrics: revenue = quantity * unit_price,order_date,year / month,customer_region,product_category

In [0]:
transactions_silver_df=spark.read.table("git_analysis.silver.transactions")

In [0]:
orders_silver_df=spark.read.table("git_analysis.silver.orders")

In [0]:
products_silver_df=spark.read.table("git_analysis.silver.products")

In [0]:
customers_silver_df=spark.read.table("git_analysis.silver.customers")

In [0]:
from pyspark.sql import functions as F

gold_sales_fact_df = (
    transactions_silver_df.alias("t")
    .join(orders_silver_df.alias("o"), "order_id")
    .join(products_silver_df.alias("p"), "product_id")
    .join(customers_silver_df.alias("c"), "customer_id")   
    .select(
        F.col("t.order_id"),
        F.to_date("t.order_timestamp").alias("order_date"),
        F.year("t.order_timestamp").alias("year"),
        F.month("t.order_timestamp").alias("month"),
        F.col("t.customer_id"),
        F.col("c.region").alias("customer_region"),
        F.col("t.product_id"),
        F.col("p.category").alias("product_category"),
        F.col("t.quantity"),
        F.col("t.unit_price"),
        (F.col("t.quantity") * F.col("t.unit_price")).alias("revenue"),
        F.col("t.status"),
        F.col("t.salesperson")
    )

)


In [0]:
gold_sales_fact_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("git_analysis.gold.gold_sales_fact")
