In [0]:
# COMMAND ----------

from pyspark.sql.functions import to_timestamp
from delta import DeltaTable

# Define paths
stream_input_path = "dbfs:/FileStore/iot_stream"  # Folder you'll drop new CSVs into
bronze_output_path = "dbfs:/delta/bronze/sensor_logs_stream"
checkpoint_path = "dbfs:/delta/bronze/_checkpoints/sensor_logs"

# Define schema for streaming CSV
from pyspark.sql.types import StructType, IntegerType, DoubleType, StringType, TimestampType

schema = StructType() \
    .add("sensor_id", IntegerType()) \
    .add("timestamp", StringType()) \
    .add("temperature", DoubleType()) \
    .add("humidity", DoubleType()) \
    .add("location", StringType())

# COMMAND ----------

# Read new CSVs as a stream
df_stream = spark.readStream \
    .option("header", True) \
    .schema(schema) \
    .csv(stream_input_path)

# Convert timestamp from string to TimestampType
df_stream = df_stream.withColumn("timestamp", to_timestamp("timestamp"))

# COMMAND ----------

# Write to Delta as a streaming Bronze table
stream = df_stream.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", checkpoint_path) \
    .start(bronze_output_path)

print("🚀 Streaming job started. Waiting for data in:", stream_input_path)


🚀 Streaming job started. Waiting for data in: dbfs:/FileStore/iot_stream


In [0]:
display(spark.read.format("delta").load(bronze_output_path))


sensor_id,timestamp,temperature,humidity,location
1004,2025-05-01T00:00:00.000+0000,78.54,39.08,Room_A
1001,2025-05-01T00:00:05.000+0000,79.98,43.99,Room_A
1004,2025-05-01T00:00:10.000+0000,77.63,42.69,Room_A
1004,2025-05-01T00:00:15.000+0000,76.48,34.0,Room_B
1005,2025-05-01T00:00:20.000+0000,66.05,59.15,Room_B
1004,2025-05-01T00:00:25.000+0000,74.46,59.76,Room_C
1001,2025-05-01T00:00:30.000+0000,75.04,53.08,Room_A
1001,2025-05-01T00:00:35.000+0000,79.21,52.25,Room_A
1002,2025-05-01T00:00:40.000+0000,68.36,59.61,Room_D
1005,2025-05-01T00:00:45.000+0000,72.5,56.51,Room_B


In [0]:
display(spark.read.format("delta").load("dbfs:/delta/bronze/sensor_logs_stream"))


sensor_id,timestamp,temperature,humidity,location
1004,2025-05-01T00:00:00.000+0000,78.54,39.08,Room_A
1001,2025-05-01T00:00:05.000+0000,79.98,43.99,Room_A
1004,2025-05-01T00:00:10.000+0000,77.63,42.69,Room_A
1004,2025-05-01T00:00:15.000+0000,76.48,34.0,Room_B
1005,2025-05-01T00:00:20.000+0000,66.05,59.15,Room_B
1004,2025-05-01T00:00:25.000+0000,74.46,59.76,Room_C
1001,2025-05-01T00:00:30.000+0000,75.04,53.08,Room_A
1001,2025-05-01T00:00:35.000+0000,79.21,52.25,Room_A
1002,2025-05-01T00:00:40.000+0000,68.36,59.61,Room_D
1005,2025-05-01T00:00:45.000+0000,72.5,56.51,Room_B
