In [None]:
import dlt
from pyspark.sql.functions import col, when, current_timestamp, date_format, to_date, sum as _sum, max as _max, window

In [None]:
# Intermediate view for bronze budget changes
@dlt.view(
    name="bronze_budget_changes_clean",
    comment="Intermediate view of bronze budget changes for SCD processing."
)
def bronze_budget_changes_clean():
    return spark.readStream.table("ad_monitor.bronze.bronze_budget_changes")

In [None]:
# Current advertiser budgets (SCD Type 1 for current state)
dlt.create_streaming_table(
  name="silver_advertiser_budgets_current",
  comment="Current daily budget for each advertiser (latest version only)."
)

dlt.apply_changes(
    target="silver_advertiser_budgets_current",
    source="bronze_budget_changes_clean",
    keys=["advertiser_id"],
    sequence_by="moment",
    stored_as_scd_type=1
)

In [None]:
# Historical budget changes (SCD Type 2 for complete history)
@dlt.table(
    name="silver_advertiser_budgets_history",
    comment="Complete history of all budget changes for audit and analysis."
)
def silver_advertiser_budgets_history():
    return (
        spark.readStream.table("ad_monitor.bronze.bronze_budget_changes")
        .withColumn("moment", col("moment"))
        .withColumn("created_at", current_timestamp())
    )


In [None]:
# Enhanced paid events with net spend calculation
@dlt.table(
    name="silver_paid_events_enhanced",
    comment="Cleaned paid events with gross and net spend calculations."
)
def silver_paid_events_enhanced():
    """
    Enhanced paid events that distinguish between gross and net spend.
    Assumes gross spend includes all charges, net spend is for invoicing.
    """
    return (
        spark.readStream.table("ad_monitor.bronze.bronze_paid_events")
        .filter(col("amount") > 0)
        .withColumn("gross_spend", col("amount"))
        .withColumn("net_spend", col("amount"))
        .withColumn("moment_ts", col("moment").cast("timestamp"))  # Create proper timestamp
        .withColumn("processed_at", current_timestamp())
    )

In [None]:
# Real-time spend aggregation by hour for faster monitoring
@dlt.table(
    name="silver_hourly_spend_summary",
    comment="Hourly spend aggregation for real-time monitoring."
)
def silver_hourly_spend_summary():
    return (
        dlt.read_stream("silver_paid_events_enhanced")
        .groupBy(
            col("advertiser_id"),
            col("moment")
        )
        .agg(
            _sum("gross_spend").alias("hourly_gross_spend"),
            _sum("net_spend").alias("hourly_net_spend"),
            _max("moment").alias("last_event_time")
        )
        .select(
            col("advertiser_id"),
            col("moment"),
            col("hourly_gross_spend"),
            col("hourly_net_spend"),
            col("last_event_time"),
            current_timestamp().alias("calculated_at")
        )
    )