In [0]:
from pyspark.sql.functions import *  #core pyspark sql funcions for data transformation
from pyspark.sql.streaming import *  #structured streaming

In [0]:
#input
origin = 'dbfs:/FileStore/landing_zone/Mobile/Online/Mobile'

#output
target_table = "spark_catalog.bronze.mobile_online"
target_path = 'dbfs:/FileStore/bronze/Mobile/Online/Mobile'
checkpoint = 'dbfs:/FileStore/bronze/Mobile/Online/Mobile_ckpt'
schema = 'dbfs:/FileStore/bronze/Mobile/Online/Mobile_schema'
source = 'Mobile Online'

In [0]:
#Reading new microbatchs for streaming
streamingDF = (spark.readStream.format('cloudFiles') #databricks Auto Loader
    .option('cloudFiles.Format', 'parquet') #specifies we're reading parquet files
    .option('cloudFiles.inferColumnTypes', 'true')   #tells spark to automatically detect data types
    .option('cloudFiles.schemaLocation', schema) #where to store schema
    .option('cloudFiles.schemaEvolutionMode', 'addNewColumns')  #used to handle changes in data structure
    .load(origin)
        #metadata column for tracking
        .withColumn('tracking_source', input_file_name())
        #more metadata
        .withColumn('source', lit(source))
        #file's landing zone ingestion time
        .withColumn('ingestion_date_time', col('_metadata.file_modification_time'))
        #extra column for future flags, if necessary
        .withColumn('status', lit(True)))
    

In [0]:
#streamingDF.createOrReplaceTempView("streamingTable")

In [0]:
#Writing data stream into the bronze layer

query = (streamingDF
         .writeStream
         .queryName ("spark_catalog.bronze.mobile_online") #query name
         .format("delta") #delta lake format for ACID, versioning
         .outputMode("append") #( append, complete, update)
         .option("checkpointLocation", checkpoint)
         .option("path", target_path)
         .trigger(availableNow=True) #batch-lie processing, process and finishes
         # .trigger(continuous='1 second') #checking for new data every 1 sec, lower latency but higher resource usage
         #.trigger(processingTime='2 seconds') #UNDERSTAND
         .table(target_table)
         )

#query.awaitTermination()

In [0]:
%sql

select * from spark_catalog.bronze.mobile_online

Name,Address,IP,Connection_Time,Device,Speed_Connection,Connection_Status,_rescued_data,tracking_source,source,ingestion_date_time,status
Katherine Reeves,USNS Moore FPO AA 07194,10.37.238.243,2025-01-12T14:22:25.988818Z,Mobile,35,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Laura Wallace,"64020 Turner Lodge New Ryanmouth, KY 85769",192.168.86.139,2025-01-12T14:22:25.989109Z,Mobile,400,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Lisa Beasley,"2955 William Mountain South Erin, DE 82871",192.168.36.73,2025-01-12T14:22:25.989346Z,Mobile,1,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Brittany Williams,"09508 Brian Plaza Suite 662 Chamberston, MD 10284",10.173.247.244,2025-01-12T14:22:25.989613Z,Mobile,50,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Ashley Harmon,"015 Hinton Drive East Trevorburgh, OR 29399",10.26.6.117,2025-01-12T14:22:25.989878Z,Mobile,200,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Deborah Morgan,"0024 Hall Station Apt. 297 North Erinshire, AK 35148",192.168.154.22,2025-01-12T14:22:25.990105Z,Mobile,25,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Amber Garner,"739 Martin River West Ryan, GA 11503",172.29.67.46,2025-01-12T14:22:25.990312Z,Mobile,15,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Renee Adams,"02292 Wade Court Apt. 544 Clarkport, CT 87335",10.50.188.215,2025-01-12T14:22:25.990564Z,Mobile,15,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Colleen Gordon,"74209 Michael Junction Apt. 926 South Jasonfort, SD 01887",192.168.207.6,2025-01-12T14:22:25.99077Z,Mobile,50,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
Travis James,"6295 William Extension Rossburgh, WY 98089",172.27.53.31,2025-01-12T14:22:25.990988Z,Mobile,5,Online,,dbfs:/FileStore/landing_zone/Mobile/Online/Mobile/Mobile_2025-01-12%2014:22:26.973111.parquet,Mobile Online,2025-01-12T14:22:28Z,True
