In [None]:
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._
val base_path = "/home/jovyan/data/kafka/"

val spark = SparkSession
  .builder
  .appName("pm")
  .getOrCreate()

val ew_schema = new StructType()
      .add("deviceID",StringType)
      .add("unix_ts",StringType)
      .add("error",StringType)
      .add("timestamp",TimestampType)
      .add("tsID",IntegerType)
      .add("warning",StringType)

val ew_df = spark
    .readStream
    .schema(ew_schema)
    .parquet(base_path + "ew.parquet")

ew_df.printSchema()

ew_df.createOrReplaceTempView("ew")
    
//ew_df
//  .writeStream
//  .outputMode("append")
//  .format("console")
//  .start()
//  .awaitTermination()

In [None]:
val get_ts = (tsID: Long) => {
    // 2010/01/01
    val startTs = 1262304000
    val interval = 900
    ((tsID - 1) * interval) + startTs
}

val get_ts_UDF = udf(get_ts)

val ew_pivoted = spark.sql(
  """SELECT deviceID, tsID,
      SUM(CASE WHEN error='e1' THEN 1 ELSE 0 END) AS error_1_count, 
      SUM(CASE WHEN error='e2' THEN 1 ELSE 0 END) AS error_2_count, 
      SUM(CASE WHEN error='e3' THEN 1 ELSE 0 END) AS error_3_count, 
      SUM(CASE WHEN error='e4' THEN 1 ELSE 0 END) AS error_4_count,
      SUM(CASE WHEN warning='w1' THEN 1 ELSE 0 END) AS warning_1_count, 
      SUM(CASE WHEN warning='w2' THEN 1 ELSE 0 END) AS warning_2_count, 
      SUM(CASE WHEN warning='w3' THEN 1 ELSE 0 END) AS warning_3_count, 
      SUM(CASE WHEN warning='w4' THEN 1 ELSE 0 END) AS warning_4_count
  FROM ew 
  GROUP BY deviceID,tsID
  """)

val ew_enriched = ew_pivoted
    .withColumn("timestamp", from_unixtime(get_ts_UDF($"tsID") / 1000 ).cast("timestamp"))
    .withColumn("dom",dayofmonth($"timestamp"))
    .withColumn("moy",month($"timestamp"))
    .withColumn("year",year($"timestamp"))

ew_enriched.printSchema()


In [None]:
val deviceData = spark
    .read
    .option("header", true)
    .option("inferSchema", true)
    .csv("/home/jovyan/data/device.csv")

val cat_join = ew_enriched.join(deviceData, "deviceID") 

cat_join
  .writeStream
  .outputMode("complete")
  .format("console")
  .start()
  .awaitTermination()