In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType

# === Spark Session (if not already created) ===
spark = SparkSession.builder.appName("BronzeStream").getOrCreate()

# === Define Schema ===
bronze_schema = StructType([
    StructField("InvoiceNo", StringType(), True),
    StructField("StockCode", StringType(), True),
    StructField("Description", StringType(), True),
    StructField("Quantity", IntegerType(), True),
    StructField("InvoiceDate", StringType(), True),
    StructField("UnitPrice", DoubleType(), True),
    StructField("CustomerID", StringType(), True),
    StructField("Country", StringType(), True)
])

# === Paths ===
input_path = "dbfs:/FileStore/tables"  # CSV folder (can drop new CSVs here)
bronze_path = "dbfs:/mnt/bronze/events"

# === Read Streaming CSV ===
bronze_stream = (
    spark.readStream
        .format("csv")
        .option("header", True)
        .schema(bronze_schema)
        .load(input_path)
)

# === Write to Bronze Delta ===
bronze_query = (
    bronze_stream.writeStream
        .format("delta")
        .option("checkpointLocation", bronze_path + "/_checkpoint")
        .outputMode("append")
        .start(bronze_path)
)