In [0]:
from pyspark.sql.functions import *  #core pyspark sql funcions for data transformation
from pyspark.sql.streaming import *  #structured streaming

In [0]:
#from pyspark.sql import SparkSession

#creating a SparkSession's instance
#spark = SparkSession.builder \
    #.appName("NameofStreaming") \
    #.getOrCreate()

In [0]:
#listing storage

dbutils.fs.ls("dbfs:/FileStore/landing_zone/Computer/Offline/Computer/")

[FileInfo(path='dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23 19:37:35.933693.parquet', name='Computer_2024-12-23 19:37:35.933693.parquet', size=7552, modificationTime=1734982657000),
 FileInfo(path='dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23 19:37:47.358983.parquet', name='Computer_2024-12-23 19:37:47.358983.parquet', size=7477, modificationTime=1734982668000),
 FileInfo(path='dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23 19:37:58.704002.parquet', name='Computer_2024-12-23 19:37:58.704002.parquet', size=7441, modificationTime=1734982679000),
 FileInfo(path='dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23 19:38:09.982125.parquet', name='Computer_2024-12-23 19:38:09.982125.parquet', size=7442, modificationTime=1734982691000),
 FileInfo(path='dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23 19:38:21.211050.parquet', name='Computer_2024-12-23 19:38

In [0]:
#input
origin = 'dbfs:/FileStore/landing_zone/Computer/Offline/Computer'

#output
target_table = "spark_catalog.bronze.computer_offline"
target_path = 'dbfs:/FileStore/bronze/Computer/Offline/Computer'
checkpoint = 'dbfs:/FileStore/bronze/Computer/Offline/Computer_ckpt'
schema = 'dbfs:/FileStore/bronze/Computer/Offline/Computer_schema'
source = 'Computer Offline'

In [0]:
#Reading new microbatchs for streaming
streamingDF = (spark.readStream.format('cloudFiles') #databricks Auto Loader
    .option('cloudFiles.Format', 'parquet') #specifies we're reading parquet files
    .option('cloudFiles.inferColumnTypes', 'true')   #tells spark to automatically detect data types
    .option('cloudFiles.schemaLocation', schema) #where to store schema
    .option('cloudFiles.schemaEvolutionMode', 'addNewColumns')  #used to handle changes in data structure
    .load(origin)
        #metadata column for tracking
        .withColumn('tracking_source', input_file_name())
        #more metadata
        .withColumn('source', lit(source))
        #file's landing zone ingestion time
        .withColumn('ingestion_date_time', col('_metadata.file_modification_time'))
        #extra column for future flags, if necessary
        .withColumn('status', lit(True)))
    

In [0]:
#streamingDF.createOrReplaceTempView("streamingTable")

In [0]:
%sql
-- select * from streamingTable

In [0]:
#Writing data stream into the bronze layer

query = (streamingDF
         .writeStream
         .queryName ("spark_catalog.bronze.computer_offline") #query name
         .format("delta") #delta lake format for ACID, versioning
         .outputMode("append") #( append, complete, update)
         .option("checkpointLocation", checkpoint)
         .option("path", target_path)
         .trigger(availableNow=True) #batch-lie processing, process and finishes
         #.trigger(continuous='1 second') #checking for new data every 1 sec, lower latency but higher resource usage
         #.trigger(processingTime='2 seconds')
         .table(target_table)
         )

#query.awaitTermination()

In [0]:
%sql

select * from bronze.computer_offline

Name,Address,IP,Connection_Time,Device,Speed_Connection,Connection_Status,_rescued_data,tracking_source,source,ingestion_date_time,status
Tina Adams,"000 Owen Cliffs New Samanthashire, AR 49677",172.29.183.180,2024-12-23T19:38:20.421086Z,Computer,500,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Laura Moore,"51708 Juarez Bridge Suite 450 New Jennifer, ID 43475",192.168.18.90,2024-12-23T19:38:20.421326Z,Computer,1000,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Robert Phillips,"19446 Amy Underpass Angelaburgh, OK 31321",10.232.172.205,2024-12-23T19:38:20.422071Z,Computer,1,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
David Robinson,"14141 Hannah Landing Mccartystad, WY 63893",192.168.28.208,2024-12-23T19:38:20.422294Z,Computer,10,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Earl Newman,"29359 Lisa Via Apt. 110 North Veronicachester, VT 68360",10.76.50.162,2024-12-23T19:38:20.422529Z,Computer,50,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Dr. Stephanie Moran,"74721 Carroll Locks Apt. 195 North Caleb, MH 50200",10.192.101.133,2024-12-23T19:38:20.423237Z,Computer,15,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Janet Spears,"4558 Wong Isle Apt. 283 New Joshua, AR 14963",172.25.20.175,2024-12-23T19:38:20.423477Z,Computer,1,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Cheryl Barnes,"9060 Jodi Brooks Ellischester, AS 48409",192.168.149.93,2024-12-23T19:38:20.423684Z,Computer,1,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Howard Sanchez,"525 Fletcher Manors Hughesmouth, MS 51927",172.31.69.146,2024-12-23T19:38:20.423967Z,Computer,10,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
Terry Smith,"24478 Simon Grove Suite 289 North Andrew, KY 27732",192.168.116.168,2024-12-23T19:38:20.424202Z,Computer,100,Offline,,dbfs:/FileStore/landing_zone/Computer/Offline/Computer/Computer_2024-12-23%2019:38:21.211050.parquet,Computer Offline,2024-12-23T19:38:22Z,True
