# INITALIZATION

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window

# Business Logic

In [0]:
fact_sales_sql = """
WITH base AS (
  SELECT
    CAST(order_number AS STRING) AS order_number,
    CAST(product_key AS STRING)  AS product_key,
    CAST(customer_id AS STRING)  AS customer_id,
    order_date,
    ship_date,
    due_date,
    sales_amount,
    quantity,
    price,
    ROW_NUMBER() OVER (
      PARTITION BY CAST(order_number AS STRING), CAST(product_key AS STRING)
      ORDER BY order_date DESC
    ) AS rn
  FROM silver.crm_sales
)
SELECT
  xxhash64(b.order_number, b.product_key) AS sales_line_sk,

  b.order_number,
  b.customer_id,
  b.product_key,

  dc.customer_key AS customer_sk,          -- or dc.customer_sk if you switch to hash keys
  dp.product_sk,

  b.order_date,
  b.ship_date,
  b.due_date,

  CAST(date_format(b.order_date,'yyyyMMdd') AS INT) AS order_date_key,
  CAST(date_format(b.ship_date,'yyyyMMdd')  AS INT) AS ship_date_key,
  CAST(date_format(b.due_date,'yyyyMMdd')   AS INT) AS due_date_key,

  b.quantity,
  b.price AS unit_price,
  b.sales_amount,
  (b.quantity * b.price) AS extended_amount

FROM base b
LEFT JOIN workspace.gold.dim_customers dc
  ON b.customer_id = dc.customer_id
LEFT JOIN workspace.gold.dim_products dp
  ON b.product_key = dp.product_key
WHERE b.rn = 1
"""


# Write the Gold Layer

In [0]:
fact_sales_df = spark.sql(fact_sales_sql)

(fact_sales_df.write
 .mode("overwrite")
 .format("delta")
 .saveAsTable("workspace.gold.fact_sales"))

In [0]:
%sql
SELECT * FROM workspace.gold.fact_sales