In [0]:
from pyspark.sql.functions import *  #core pyspark sql funcions for data transformation
from pyspark.sql.streaming import *  #structured streaming

In [0]:
#from pyspark.sql import SparkSession

#creating a SparkSession's instance
#spark = SparkSession.builder \
    #.appName("NameofStreaming") \
    #.getOrCreate()

In [0]:
#input
origin = 'dbfs:/FileStore/landing_zone/Computer/Online/Computer'

#output
target_table = "spark_catalog.bronze.computer_online"
target_path = 'dbfs:/FileStore/bronze/Computer/Online/Computer'
checkpoint = 'dbfs:/FileStore/bronze/Computer/Online/Computer_ckpt'
schema = 'dbfs:/FileStore/bronze/Computer/Online/Computer_schema'
source = 'Computer Online'

In [0]:
#Reading new microbatchs for streaming
streamingDF = (spark.readStream.format('cloudFiles') #databricks Auto Loader
    .option('cloudFiles.Format', 'parquet') #specifies we're reading parquet files
    .option('cloudFiles.inferColumnTypes', 'true')   #tells spark to automatically detect data types
    .option('cloudFiles.schemaLocation', schema) #where to store schema
    .option('cloudFiles.schemaEvolutionMode', 'addNewColumns')  #used to handle changes in data structure
    .load(origin)
        #metadata column for tracking
        .withColumn('tracking_source', input_file_name())
        #more metadata
        .withColumn('source', lit(source))
        #file's landing zone ingestion time
        .withColumn('ingestion_date_time', col('_metadata.file_modification_time'))
        #extra column for future flags, if necessary
        .withColumn('status', lit(True)))

In [0]:
#streamingDF.createOrReplaceTempView("streamingTable")

In [0]:
#Writing data stream into the bronze layer

query = (streamingDF
         .writeStream
         .queryName ("spark_catalog.bronze.computer_online") #query name
         .format("delta") #delta lake format for ACID, versioning
         .outputMode("append") #( append, complete, update)
         .option("checkpointLocation", checkpoint)
         .option("path", target_path)
         .trigger(availableNow=True) #batch-lie processing, process and finishes
         # .trigger(continuous='1 second') #checking for new data every 1 sec, lower latency but higher resource usage
         #.trigger(processingTime='2 seconds')
         .table(target_table)
         )

#query.awaitTermination()

In [0]:
%sql

select * from bronze.computer_online

Name,Address,IP,Connection_Time,Device,Speed_Connection,Connection_Status,_rescued_data,tracking_source,source,ingestion_date_time,status
David Gonzales,USNS Peck FPO AP 13289,192.168.226.35,2024-12-23T19:46:32.221033Z,Computer,100,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Adam Cuevas,"71699 Gross Cliffs New Anthonyborough, OH 56727",10.133.149.155,2024-12-23T19:46:32.221318Z,Computer,1000,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Patrick Brandt,"4447 Hall Square Suite 423 Timothyberg, PA 70181",10.235.65.104,2024-12-23T19:46:32.221585Z,Computer,1000,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Rebecca Mata,"5248 Anthony Fort Davidhaven, RI 14130",192.168.166.45,2024-12-23T19:46:32.221806Z,Computer,35,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Anthony Macias,"2102 Julie Inlet Mossborough, PW 26789",172.23.180.208,2024-12-23T19:46:32.222016Z,Computer,100,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Allison Delgado,"47946 Hamilton Well Apt. 918 Lake Bradley, OH 74031",172.16.36.216,2024-12-23T19:46:32.222235Z,Computer,5,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Brian Rich,"240 Mcdonald Parkways Apt. 881 Port Jason, MT 30542",192.168.199.165,2024-12-23T19:46:32.222466Z,Computer,1000,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Russell Bradley,"7298 Clark Mills Laneville, HI 77026",192.168.135.215,2024-12-23T19:46:32.222704Z,Computer,5,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Shane Richardson,"69260 Kathy Hill Apt. 271 North William, AS 74029",192.168.83.32,2024-12-23T19:46:32.222925Z,Computer,15,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
Jodi Reed,"65920 Virginia Keys New Jillianberg, TN 55741",172.30.67.15,2024-12-23T19:46:32.223124Z,Computer,1000,Online,,dbfs:/FileStore/landing_zone/Computer/Online/Computer/Computer_2024-12-23%2019:46:33.011198.parquet,Computer Online,2024-12-23T19:46:34Z,True
