### Read File using DataStreamReader API

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, LongType

customer_schema = StructType([
    StructField("created_timestamp", StringType(), True),
    StructField("customer_id", LongType(), True),
    StructField("customer_name", StringType(), True),
    StructField("date_of_birth", StringType(), True),
    StructField("email", StringType(), True),
    StructField("member_since", StringType(), True),
    StructField("telephone", StringType(), True)
])

In [0]:
customers_df = spark.readStream.format("json") \
                .schema(customer_schema) \
                .load("/Volumes/gizmobox/landing/operational_data/customer_stream/")

In [0]:
import pyspark.sql.functions as F

customers_transformed_df=customers_df.withColumn("file_path", F.col("_metadata.file_path")).withColumn(
    "ingestion_date", F.current_timestamp()
)

In [0]:
streaming_query = (
    customers_transformed_df.writeStream.format("delta")
    .option(
        "checkpointLocation",
        "/Volumes/gizmobox/landing/operational_data/customer_stream/_checkpoint_stream",
    )
    .toTable("gizmobox.bronze.customer_stream")
)

In [0]:
streaming_query.stop()

In [0]:
%sql
SELECT * from gizmobox.bronze.customer_stream;

### Spark Structured Streaming Processing Modes
Spark Structured Streaming has 3 processing modes (also called trigger modes):

- Fixed Interval Micro-Batch (processingTime) Default Behaviour


- Continuous Processing 
  .trigger(processingTime="10 seconds")

- Available Now (available Now)
  .trigger(availableNow=True)


- Once (Deprecated - Use availableNow)
  .trigger(once=True)

### outPutMode

- append(Default)
- complete
- update


### checkpoint

