# Fundamentos de `Structured Streaming`

In [None]:
import org.apache.spark._

In [None]:
import org.apache.spark.streaming._

In [None]:
import org.apache.spark.sql.streaming._

In [None]:
import org.apache.spark.sql.SparkSession

In [None]:
val spark = SparkSession.builder.getOrCreate()

In [None]:
import spark.implicits._

In [None]:
val datos_no_format = spark.readStream.format("socket").
option("host","localhost").option("port", 19001).load()

# Salida de datos

In [None]:
val query = datos_no_format.writeStream.format("console").
outputMode("append").queryName("test").
start()

# Ciclo de vida del procesamiento

In [None]:
spark.streams.active

In [None]:
query.id

In [None]:
query.runId

In [None]:
query.name

In [None]:
query.lastProgress

In [None]:
query.recentProgress

In [None]:
query.stop

# Tratamiento de los datos

In [None]:
import org.apache.spark.sql.types._

In [None]:
import org.apache.spark.sql.Row

In [None]:
import org.apache.spark.sql.functions._

In [None]:
val datos_no_format = spark.readStream.format("socket").
option("host","localhost").option("port", 19001).load()

In [None]:
datos_no_format.printSchema

In [None]:
val datos_splitted = datos_no_format.withColumn("tmp", split($"value", ";"))

In [None]:
datos_splitted.printSchema

In [None]:
import java.sql.Timestamp

In [None]:
val datos = datos_splitted.select(from_unixtime($"tmp".getItem(0).cast(LongType) / 1000).cast(TimestampType).as("ts"), 
                                  $"tmp".getItem(1).as("nodo"),
                                  $"tmp".getItem(2).as("sensor"),
                                  $"tmp".getItem(3).as("valor"))

In [None]:
datos.printSchema

In [None]:
val query_1 = datos.writeStream.format("console").
outputMode("append").queryName("test 1").
start()

In [None]:
val query_2 = datos.filter('sensor === "Temp" && 'valor > 26.25).
writeStream.format("console").outputMode("append").queryName("test 2").
start()

In [None]:
val query_3 = datos.filter('sensor === "Temp" && 'valor > 26.25).
writeStream.format("console").outputMode("update").queryName("test 3").
start()

In [None]:
query_1.stop

In [None]:
query_2.stop

In [None]:
query_3.stop

In [None]:
val query_4 = datos.writeStream.format("console").
outputMode("append").queryName("test 4").
trigger(Trigger.ProcessingTime("30 seconds")).start()

In [None]:
val query_5 = datos.filter('sensor === "Temp" && 'valor > 26.25).
writeStream.format("console").outputMode("append").queryName("test 5").
trigger(Trigger.ProcessingTime("30 seconds")).start()

In [None]:
query_4.stop

In [None]:
query_5.stop

# Ventanas temporales de procesamiento

In [None]:
val aux_query_5 = datos.groupBy($"sensor", window($"ts", "45 seconds", "15 seconds")).count()

In [None]:
val query_5 = aux_query_5.writeStream.format("console").outputMode("complete").queryName("test 5").
option("truncate", false).
trigger(Trigger.ProcessingTime("30 seconds")).start()

In [None]:
query_5.stop

In [None]:
val query_6 = datos.writeStream.format("console").
outputMode("append").queryName("test 6").
trigger(Trigger.ProcessingTime("30 seconds")).start()

In [None]:
query_6.stop