## GOLD LAYER
- Gold reads only from Silver
- No cleansing or calibration in Gold
- Deterministic aggregations
- Auto-optimize enabled declaratively

In [0]:
import dlt
from pyspark.sql import functions as F

In [0]:
''' 
Daily Sales Summary (Fact Table)
- Explicit date column
- Stable aggregation logic
- Analytics-friendly schema
- Auto-optimize enabled
'''

@dlt.table(
    name="gold_daily_sales_summary",
    comment="Daily sales KPIs for analytics and reporting",
    table_properties={
        "quality": "gold",
        "pipelines.autoOptimize.managed": "true",
        "pipelines.autoOptimize.zOrderCols": "sales_date"
    }
)
def gold_daily_sales_summary():

    sales = dlt.read("silver_sales_transactions")

    return (
        sales
        .withColumn("sales_date", F.to_date("transaction_timestamp_utc"))
        .groupBy("sales_date")
        .agg(
            F.countDistinct("transaction_id").alias("total_transactions"),
            F.sum("quantity").alias("total_quantity"),
            F.sum("total_amount").alias("total_revenue"),
            F.round(F.avg("total_amount"), 2).alias("avg_order_value")
        )
    )


In [0]:
# Monthly Revenue by Region
@dlt.table(
    name="gold_monthly_revenue_by_region",
    comment="Monthly revenue aggregated by region and country",
    table_properties={
        "quality": "gold",
        "pipelines.autoOptimize.managed": "true",
        "pipelines.autoOptimize.zOrderCols": "year_month,region"
    }
)
def gold_monthly_revenue_by_region():

    sales = dlt.read("silver_sales_transactions")
    stores = dlt.read("silver_store_region")

    return (
        sales
        .join(stores, "store_id", "left")
        .withColumn("year_month", F.date_format("transaction_timestamp_utc", "yyyy-MM"))
        .groupBy("year_month", "region", "country")
        .agg(
            F.sum("total_amount").alias("monthly_revenue"),
            F.countDistinct("transaction_id").alias("transaction_count")
        )
    )


In [0]:
# Product Performance Metrics

@dlt.table(
    name="gold_product_performance",
    comment="Product-level sales and revenue performance",
    table_properties={
        "quality": "gold",
        "pipelines.autoOptimize.managed": "true",
        "pipelines.autoOptimize.zOrderCols": "category,product_id"
    }
)
def gold_product_performance():

    sales = dlt.read("silver_sales_transactions")
    products = dlt.read("silver_product_master")

    return (
        sales
        .join(products, "product_id", "left")
        .groupBy(
            "product_id",
            "product_name",
            "category",
            "brand"
        )
        .agg(
            F.sum("quantity").alias("total_units_sold"),
            F.sum("total_amount").alias("total_revenue"),
            F.round(F.avg("unit_price"), 2).alias("avg_selling_price")
        )
    )
