# Applying Spark DataFrame API on Streaming
You can use DataFrame transformations like filter, select, or groupBy on unbounded streaming data.

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, window

# Initialize SparkSession
spark = SparkSession.builder.appName("Spark Structured Streaming Application").getOrCreate()

# Define schema
schema = "id INT, name STRING, age INT, event_time TIMESTAMP"

# Read streaming data
streaming_df = spark.readStream.schema(schema).csv("input/")

# # Drop late events with watermark
# result_df = streaming_df \
#     .withWatermark("event_time", "5 minutes") \
#     .groupBy(window(col("event_time"), "10 minutes"), col("name")) \
#     .count()

# # Output to console
# query = result_df.writeStream.format("console").outputMode("append").start()

# query.awaitTermination()

In [None]:
transformed_df = streaming_df \
    .filter(col("age") > 18) \
    .select("name", "age")

query = transformed_df.writeStream.format("console").outputMode("append").start()
query.awaitTermination()

# Applying Spark SQL API on Streaming
You can register a streaming DataFrame as a temporary view and run SQL queries on it.

In [None]:
# Register streaming DataFrame as a temporary view
streaming_df.createOrReplaceTempView("people")

# Use Spark SQL to query the streaming data
result_df = spark.sql("SELECT name, COUNT(*) AS count FROM people GROUP BY name")

# Write the result to the console
query = result_df.writeStream.format("console").outputMode("update").start()

query.awaitTermination()