In [0]:
from pyspark.sql.functions import (
    col, lit, when, monotonically_increasing_id, row_number,
    year, month, dayofmonth, dayofweek, quarter, weekofyear,
    date_format, hour, minute, second, current_date, to_date
)
from pyspark.sql.window import Window


silver_table = "oliv_mitai_uc.silver.sales_silver"
gold_db = "oliv_mitai_uc.gold"

df_silver = spark.table(silver_table)

# =========================================================
# DIM_DATE
# =========================================================
dim_date = (
    df_silver.select(col("bill_datetime").alias("full_date"))
    .dropna()
    .dropDuplicates()
    .withColumn("date_key", date_format(col("full_date"), "yyyyMMdd").cast("int"))
    .withColumn("day_of_week", dayofweek(col("full_date")))
    .withColumn("day_name", date_format(col("full_date"), "EEEE"))
    .withColumn("day_of_month", dayofmonth(col("full_date")))
    .withColumn("week_of_year", weekofyear(col("full_date")))  
    .withColumn("month", month(col("full_date")))
    .withColumn("month_name", date_format(col("full_date"), "MMMM"))
    .withColumn("quarter", quarter(col("full_date")))
    .withColumn("year", year(col("full_date")))
    .withColumn("is_weekend", when(col("day_of_week").isin(1, 7), True).otherwise(False))
    .withColumn("is_holiday", lit(False))
)

spark.sql(f"CREATE DATABASE IF NOT EXISTS {gold_db}")

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {gold_db}.dim_date (
    date_key INT,
    full_date DATE,
    day_of_week INT,
    day_name STRING,
    day_of_month INT,
    week_of_year INT,
    month INT,
    month_name STRING,
    quarter INT,
    year INT,
    is_weekend BOOLEAN,
    is_holiday BOOLEAN
)
USING DELTA
""")

dim_date.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.dim_date")

# =========================================================
# DIM_TIME
# =========================================================
dim_time = (
    df_silver.select(col("bill_datetime").alias("time_value"))
    .dropna()
    .dropDuplicates()
    .withColumn("hour", hour(col("time_value")))
    .withColumn("minute", minute(col("time_value")))
    .withColumn("second", second(col("time_value")))
    .withColumn("time_key", (col("hour") * 10000 + col("minute") * 100 + col("second")).cast("int"))
    .withColumn(
        "time_bucket",
        when(col("hour").between(6, 11), "Morning")
        .when(col("hour").between(12, 16), "Afternoon")
        .when(col("hour").between(17, 20), "Evening")
        .otherwise("Night")
    )
)

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {gold_db}.dim_time (
    time_key INT,
    hour INT,
    minute INT,
    second INT,
    time_bucket STRING
)
USING DELTA
""")

dim_time.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.dim_time")

# =========================================================
# DIM_STORE
# =========================================================
dim_store = (
    df_silver.select(
        "store_code", "store_name", "address_line1", "city", "state", "postal_code",
        "gstin", "phone", "fssai_no", current_date().alias("created_date")
    ).dropDuplicates()
)

dim_store = dim_store.withColumn("store_key", row_number().over(Window.orderBy("store_code")))

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {gold_db}.dim_store (
    store_key INT,
    store_code STRING,
    store_name STRING,
    address_line1 STRING,
    city STRING,
    state STRING,
    postal_code STRING,
    gstin STRING,
    phone STRING,
    fssai_no STRING,
    created_date TIMESTAMP
)
USING DELTA
""")

dim_store.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.dim_store")

# =========================================================
# DIM_CASHIER
# =========================================================
dim_cashier = (
    df_silver.select("cashier_code", "cashier_name")
    .dropDuplicates()
    .withColumn("cashier_key", row_number().over(Window.orderBy("cashier_code")))
    .withColumn("active_flag", lit(True))
    .withColumn("start_date", lit(None).cast("date"))
)

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {gold_db}.dim_cashier (
    cashier_key INT,
    cashier_code STRING,
    cashier_name STRING,
    active_flag BOOLEAN,
    start_date DATE
)
USING DELTA
""")

dim_cashier.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.dim_cashier")

# =========================================================
# DIM_PRODUCT
# =========================================================
dim_product = (
    df_silver.select("sku", "hsn_code", "product_name", "category", "uom", "rate")
    .dropDuplicates()
    .withColumn("product_key", row_number().over(Window.orderBy("sku")))
    .withColumn("current_price", col("rate").cast("decimal(12,2)"))
    .withColumn("effective_from", lit(None).cast("date"))
)

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {gold_db}.dim_product (
    product_key INT,
    sku STRING,
    hsn_code STRING,
    product_name STRING,
    category STRING,
    uom STRING,
    current_price DECIMAL(12,2),
    effective_from DATE
)
USING DELTA
""")

dim_product.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.dim_product")

# =========================================================
# DIM_PAYMENT_METHOD
# =========================================================
dim_payment = (
    df_silver.select("payment_method")
    .dropDuplicates()
    .withColumn("payment_method_key", row_number().over(Window.orderBy("payment_method")))
    .withColumn("method_code", col("payment_method"))
    .withColumn("method_name", col("payment_method"))
)

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {gold_db}.dim_payment_method (
    payment_method_key INT,
    method_code STRING,
    method_name STRING
)
USING DELTA
""")

dim_payment.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.dim_payment_method")

# =========================================================
# DIM_RECEIPT
# =========================================================
dim_receipt = (
    df_silver.select(
        "bill_no", "bill_datetime", "store_code", "cashier_code",
        "counter_name", "payment_method", "total_amount"
    )
    .dropDuplicates()
    .withColumn("receipt_key", row_number().over(Window.orderBy("bill_no")))
)

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {gold_db}.dim_receipt (
    receipt_key INT,
    store_key INT,
    bill_no STRING,
    bill_datetime TIMESTAMP,
    customer_id INT,
    cashier_key INT,
    counter_key INT,
    payment_method_key INT,
    total_amount DECIMAL(12,2)
)
USING DELTA
""")

dim_receipt.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.dim_receipt")

# =========================================================
# FACT_SALES_LINE
# =========================================================

fact_sales = (
    df_silver.select(
        "bill_no",
        "bill_datetime",
        "store_code",
        "cashier_code",
        "counter_name",
        "sku",
        "product_name",
        "category",
        "uom",
        "tax_percent",
        "tax_amount",
        "quantity",
        "rate",
        "amount",
        "payment_method",
        "total_amount"
    )
    .withColumn("bill_date", to_date(col("bill_datetime")))
    .withColumn("bill_time", date_format(col("bill_datetime"), "HH:mm:ss"))
    .withColumn("unit_rate", col("rate"))
    .withColumn("line_amount_excl_tax", col("amount"))
    .withColumn("line_amount_incl_tax", col("amount") + col("tax_amount"))
    .withColumn("load_datetime", col("bill_datetime"))
    .withColumn("fact_line_key", monotonically_increasing_id())
    .withColumn("source_bill_no", col("bill_no"))
)



fact_sales.write.option("overwriteSchema", "true").mode("overwrite").format("delta").saveAsTable(f"{gold_db}.fact_sales_line")


In [0]:
%sql
select * from oliv_mitai_uc.gold.fact_sales_line