In [0]:
from pyspark.sql.functions import col, regexp_replace, expr, to_timestamp, lit, sum as sum_
from pyspark.sql.window import Window

# Load 
df = spark.read.format("delta").load("/mnt/bronze/retail_bronze")

# Clean 
df = (
    df.withColumn("Order_Demand", regexp_replace("Order_Demand", r"[()]", ""))
      .withColumn("Order_Demand", col("Order_Demand").cast("int"))
)

# Drop rows
df = df.dropna(subset=["Order_Demand", "Open", "Product_Category", "Product_ID"])

df = (
    df.withColumn("stock_remaining", col("Open") - col("Order_Demand"))
      .withColumn("backorder_flag", expr("stock_remaining < 0"))
      .withColumn("event_time", to_timestamp("Date", "yyyy-MM-dd"))
)

# demand
rolling = Window.partitionBy("Product_ID").orderBy("event_time").rowsBetween(-6, 0)
df = df.withColumn("rolling_7d_demand", sum_("Order_Demand").over(rolling))

df = (
    df.withColumn(
        "priority_level",
        col("priority_level").cast("string") if "priority_level" in df.columns else lit(None).cast("string")
    )
)

# Save 
df.write.format("delta").mode("overwrite").option("mergeSchema", "true").save("/mnt/silver/retail_silver")

print("Silver table ready. Row count:", df.count())

Silver table ready. Row count: 169211
