In [0]:
from pyspark.sql.functions import col, to_timestamp

bronze_path = "dbfs:/mnt/bronze/events"
silver_path = "dbfs:/mnt/silver/events"

# Read bronze as streaming
bronze_df = spark.readStream.format("delta").load(bronze_path)

# Silver transformations
silver_df = (
    bronze_df
    # optional: filter invalid rows
    .filter(col("InvoiceNo").isNotNull() & col("CustomerID").isNotNull())
    # cast numeric columns
    .withColumn("Quantity", col("Quantity").cast("int"))
    .withColumn("UnitPrice", col("UnitPrice").cast("double"))
    .withColumn("InvoiceTimestamp", to_timestamp(col("InvoiceDate"), "dd-MM-yyyy HH:mm"))
    # drop duplicates
    .dropDuplicates(["InvoiceNo", "StockCode", "CustomerID"])
)

# Write to silver
silver_query = (
    silver_df.writeStream
    .format("delta")
    .option("checkpointLocation", silver_path + "/_checkpoint")
    .outputMode("append")
    .start(silver_path)
)
