In [None]:
import dlt
from pyspark.sql.functions import expr, col, window, current_timestamp, sum as _sum
from pyspark.sql.window import Window

# --- Configuration ---
CATALOG_NAME = "ad_monitor"
SCHEMA_NAME = "landing"

# Create catalog using default storage

# print(f"Creating catalog: {CATALOG_NAME}")
#spark.sql(f"""
# CREATE CATALOG IF NOT EXISTS {CATALOG_NAME} 
# COMMENT 'Catalog for ad monitoring data'
#""")

#print(f"✅ Catalog '{CATALOG_NAME}' created or already exists")

paid_events_volume_path = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/paid_events_stream"
budget_changes_volume_path = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/budget_changes_stream"

In [None]:

@dlt.table(
    name="bronze_paid_events",
    comment="Raw, unprocessed paid click events from the streaming source."
)
def bronze_paid_events():
    """
    Ingests raw paid event data from the volume as a streaming source.
    Adds metadata for tracking purposes.
    """
    return (
        spark.readStream.format("delta")
            .load(paid_events_volume_path)
            .withColumn("ingestion_timestamp", expr("current_timestamp()"))
    )

@dlt.table(
    name="bronze_budget_changes",
    comment="Raw, unprocessed budget change events from the streaming source."
)
def bronze_budget_changes():
    """
    Ingests raw budget change data from the volume as a streaming source.
    Adds metadata for tracking purposes.
    """
    return (
        spark.readStream.format("delta")
            .load(budget_changes_volume_path)
            .withColumn("ingestion_timestamp", expr("current_timestamp()"))
    )