In [0]:
dbutils.widgets.dropdown(
    "layer",
    "bronze",
    ["bronze", "silver", "gold"],
    "Pipeline Layer"
)


In [0]:
layer = dbutils.widgets.get("layer")
print("Running layer:", layer)


Running layer: bronze


In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window


In [0]:
def run_bronze():
    data = [
        (1, 101, "view", "2024-01-01 10:00:00", 500.0, "sess1"),
        (1, 101, "purchase", "2024-01-01 10:05:00", 500.0, "sess1"),
        (2, 102, "view", "2024-01-01 11:00:00", 1200.0, "sess2"),
        (3, 103, "view", "2024-01-02 09:00:00", 300.0, "sess3")
    ]

    cols = ["user_id", "product_id", "event_type", "event_time", "price", "user_session"]

    bronze_df = spark.createDataFrame(data, cols) \
        .withColumn("event_time", F.to_timestamp("event_time")) \
        .withColumn("ingestion_ts", F.current_timestamp())

    bronze_df.write \
        .format("delta") \
        .mode("overwrite") \
        .saveAsTable("ecommerce.bronze_events")

    print("✅ Bronze layer completed")


In [0]:
def run_silver():
    bronze_df = spark.table("ecommerce.bronze_events")

    silver_df = bronze_df.select(
        "user_id",
        "product_id",
        "event_type",
        "event_time",
        "price",
        "user_session",
        "ingestion_ts"
    ) \
    .filter(F.col("price") > 0) \
    .dropDuplicates(["user_session", "event_time"]) \
    .withColumn("event_date", F.to_date("event_time")) \
    .withColumn(
        "price_tier",
        F.when(F.col("price") < 500, "budget")
         .when(F.col("price") < 1000, "mid")
         .otherwise("premium")
    )

    # SAFELY RECREATE TABLE (NO MERGE, NO CONFLICT)
    spark.sql("DROP TABLE IF EXISTS ecommerce.events_delta")

    silver_df.write \
        .format("delta") \
        .mode("overwrite") \
        .saveAsTable("ecommerce.events_delta")

    print("✅ Silver layer completed without schema conflict")


In [0]:
def run_gold():
    silver_df = spark.table("ecommerce.events_delta")

    gold_df = silver_df.groupBy("product_id") \
        .agg(
            F.count(F.when(F.col("event_type") == "view", True)).alias("views"),
            F.count(F.when(F.col("event_type") == "purchase", True)).alias("purchases"),
            F.sum(F.when(F.col("event_type") == "purchase", F.col("price"))).alias("revenue")
        ) \
        .withColumn(
            "conversion_rate",
            F.when(F.col("views") > 0, F.col("purchases") / F.col("views"))
        )

    gold_df.write \
        .format("delta") \
        .mode("overwrite") \
        .saveAsTable("ecommerce.gold_product_metrics")

    print("✅ Gold layer completed")
