In [0]:
from pyspark.sql import functions as F
from delta.tables import DeltaTable

# Read streaming data from the Silver tables
orders_df = spark.readStream.table("oms_analytics.silver.orders").alias("orders")
order_items_df = spark.readStream.table("oms_analytics.silver.order_items").alias("order_items")

# Rename order timestamp to avoid ambiguity
order_items_df = order_items_df.withColumnRenamed("order_timestamp", "order_item_timestamp")

# Add watermark to handle late arrivals
orders_df = orders_df.withWatermark("order_timestamp", "2 minutes")
order_items_df = order_items_df.withWatermark("order_item_timestamp", "2 minutes")

# Join the orders and order_items dataframes
joined_df = orders_df.join(order_items_df, "order_id")

# Aggregate the data to get the necessary metrics
aggregated_df = joined_df \
    .groupBy(
        "date_id",
        "customer_id",
        "product_id",
        F.window("order_timestamp", "5 minutes")
    ) \
    .agg(
        F.sum("quantity").alias("items_sold"),
        F.sum("line_total").alias("sales_amount")
    ) 

# Extract window start and end times from the window
aggregated_df = aggregated_df \
    .withColumn("window_start", F.col("window.start")) \
    .withColumn("window_end", F.col("window.end"))

# Add other required fields
final_df = aggregated_df \
    .withColumn("surrogate_key", F.sha2(F.concat_ws("_", 
        F.date_format(F.col("date_id"), "yyyyMMdd"),
        F.col("window_start").cast("string"),
        F.col("window_end").cast("string"),
        F.col("customer_id").cast("string"),
        F.col("product_id").cast("string")
    ), 256)) \
    .withColumn("process_id", F.lit("de_nb_102")) \
    .withColumn("gold_load_ts", F.current_timestamp()) \
    .select(
        "surrogate_key",
        "customer_id",
        "date_id",
        "window_start",
        "window_end",
        "product_id",
        "items_sold",
        "sales_amount",
        "process_id",
        "gold_load_ts"
    )

# Define the checkpoint location
external_location_name = "abfss://orders@omslandingzone.dfs.core.windows.net"
checkpoint_location_daily_sales_fact = f"{external_location_name}/checkpoints/gold_loader/daily_sales_fact"

# Append the aggregated metrics into the Delta table
query = final_df.writeStream \
    .outputMode("append") \
    .format("delta") \
    .option("checkpointLocation", checkpoint_location_daily_sales_fact) \
    .toTable("oms_analytics.gold.daily_sales_fact")

query.awaitTermination()
