# Window operation on event time
1. This document is mostly based on the 
<a href="https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#window-operations-on-event-time"> [link] </a>

2. Run the generator from data_generator.ipynb in the current folder. 

# Load SparkSession and Data

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructField, StringType
from pyspark.sql.functions import udf
from pyspark.sql.functions import col

from pyspark.sql.functions import to_timestamp
from pyspark.sql import types

spark = SparkSession \
    .builder \
    .appName("Window Operations on Event Time") \
    .getOrCreate()

static_df = spark.read.json("./data/sample.txt")
schema = static_df.schema

df = spark.readStream.format("json").schema(schema).option("maxFilesPerTrigger", 1).load("./data/")

print(df)
static_df.show(5, truncate=False)

DataFrame[created_at: string, sentiment_level: string]
+-------------------+---------------+
|created_at         |sentiment_level|
+-------------------+---------------+
|2021-10-29T18:10:09|2              |
+-------------------+---------------+



# Window operation

In [2]:
from pyspark.sql.functions import window

windowsedCounts = df.groupBy(
    window(df.created_at, "10 seconds", "2 seconds").alias("created_at"),
    df.sentiment_level
).count()

In [3]:
launch = windowsedCounts \
    .writeStream \
    .outputMode("complete") \
    .queryName("df") \
    .format("memory") \
.start()

In [9]:
import time

for _ in range(3):
    spark.sql("select * from df").sort("created_at", ascending=False).show(5, truncate=False)
    time.sleep(5)

+------------------------------------------+---------------+-----+
|created_at                                |sentiment_level|count|
+------------------------------------------+---------------+-----+
|{2021-10-29 20:27:14, 2021-10-29 20:27:24}|2              |2    |
|{2021-10-29 20:27:14, 2021-10-29 20:27:24}|1              |2    |
|{2021-10-29 20:27:14, 2021-10-29 20:27:24}|0              |1    |
|{2021-10-29 20:27:12, 2021-10-29 20:27:22}|0              |3    |
|{2021-10-29 20:27:12, 2021-10-29 20:27:22}|2              |2    |
+------------------------------------------+---------------+-----+
only showing top 5 rows

+------------------------------------------+---------------+-----+
|created_at                                |sentiment_level|count|
+------------------------------------------+---------------+-----+
|{2021-10-29 20:27:22, 2021-10-29 20:27:32}|1              |2    |
|{2021-10-29 20:27:22, 2021-10-29 20:27:32}|2              |1    |
|{2021-10-29 20:27:22, 2021-10-29 20:

KeyboardInterrupt: 