In [None]:
from spylon_kernel import register_ipython_magics
register_ipython_magics()

In [None]:
import org.apache.spark.sql.SparkSession
import java.sql.Timestamp
import org.apache.spark.sql.functions.{timestamp_seconds}
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming._
import org.apache.spark.sql.Row

val spark = SparkSession.builder.appName("airline").getOrCreate()
import spark.implicits._

In [None]:
case class Basket(user:String, item:String, quantity:Int, eventtime:Timestamp)
case class BasketState(user:String, item:String, quantity:Int, accu:String)

val datastream = (spark
  .readStream.format("socket")
  .option("host", "localhost")
  .option("port", 4000)
  .load())



In [None]:

val data_table: Dataset[Basket] = datastream.select(get_json_object($"value","$.user").alias("user"),
  get_json_object($"value","$.item").alias("item"),
  get_json_object($"value","$.quantity").cast("int").alias("quantity"),
  timestamp_seconds(round(get_json_object($"value","$.eventtime")/1000)).alias("eventtime")).as[Basket]

In [None]:
val grouped_data_ds = data_table.groupByKey(row => (row.user, row.item)).agg(sum("quantity").as[Int])

In [None]:


 def updateBasketQuantity(key: (String, String), transactions: Iterator[Basket],  state: GroupState[BasketState]): BasketState = {

    def updateState(currentState: BasketState, quantities: Iterator[Basket]): BasketState = {    
       if (quantities.hasNext) {
         val currentTransaction = quantities.next()
         val newquantity = currentTransaction.quantity
         val currentQuantity = if (currentState.quantity + newquantity < 0) 0 else currentState.quantity + newquantity
         val newState = new BasketState(key._1, key._2, currentQuantity, currentState.accum + " " + newquantity)

         updateState(newState, quantities)
       }
       else currentState
    }

    if (state.exists)
      state.update(updateState(state.get, transactions))
    else 
      state.update(updateState(new BasketState(key._1, key._2, 0, "") , transactions))

  return state.get
}

In [None]:
val newQuantities = grouped_data_ds.mapGroupsWithState(updateBasketQuantity _)

In [None]:
val streamingQuery = (newQuantities
  .writeStream
  .format("console")
  .option("truncate", "false")
  .outputMode("complete")
  .trigger(Trigger.ProcessingTime("10 second"))
  .start())

  streamingQuery.awaitTermination()

In [None]:
streamingQuery.stop()