### Add parameter widgets to notebooks

In [0]:
# Add widgets for parameters
dbutils.widgets.text("source_path", "/Volumes/workspace/ecommerce/ecommerce_data/2019-Nov.csv")
dbutils.widgets.dropdown("layer", "bronze", ["bronze","silver","gold"])

In [0]:
source_path = dbutils.widgets.get("source_path")
active_layer = dbutils.widgets.get("layer")

print(f"source_path: {source_path}")
print(f"layer: {active_layer}")



In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import StringType

# Get widget value
source_path = dbutils.widgets.get("source_path")

def get_final_category(category_code):
    if category_code is None:
        return None
    return category_code.split(".")[-1]

get_final_category_udf = F.udf(get_final_category, StringType())


def bronze_data():
    raw = spark.read.csv(source_path, header=True, inferSchema=True)

    raw.withColumn("ingestion_time", F.current_timestamp()) \
       .write.format("delta") \
       .mode("overwrite") \
       .saveAsTable("ecommerce_bronze")

    return "Bronze data loaded"


def silver_data():
    bronze = spark.read.table("ecommerce_bronze")

    silver = (
        bronze
        .filter((F.col("price") > 0) & (F.col("price") < 10000))
        .dropDuplicates(['user_session', 'event_time'])
        .withColumn("event_date", F.to_date(F.col("event_time")))
        .withColumn(
            "price_tier",
            F.when(F.col("price") < 100, "budget")
             .when(F.col("price") < 200, "affordable")
             .when(F.col("price") < 500, "midrange")
             .when(F.col("price") < 1000, "luxury")
             .otherwise("ultra_luxury")
        )
        .filter(F.col("category_code").isNotNull())
        .withColumn(
            "product_category",
            get_final_category_udf(F.col("category_code"))
        )
    )

    silver.write.format("delta") \
        .mode("overwrite") \
        .saveAsTable("ecommerce_silver")

    return "Silver data loaded"


def gold_data():
    silver = spark.read.table("ecommerce_silver")

    gold = (
        silver
        .filter(F.col("product_category") == "smartphone")
        .groupBy("brand", "price_tier")
        .agg(
            F.countDistinct(F.when(F.col("event_type") == "view", F.col("user_id"))).alias("views"),
            F.countDistinct(F.when(F.col("event_type") == "purchase", F.col("user_id"))).alias("purchases"),
            F.sum(F.when(F.col("event_type") == "purchase", F.col("price"))).alias("revenue")
        )
        .withColumn("conversion_rate", (F.col("purchases") / F.col("views")) * 100)
        .orderBy(F.col("conversion_rate").desc())
    )

    gold.write.format("delta") \
        .mode("overwrite") \
        .saveAsTable("ecommerce_gold")

    return "Gold data loaded"
