In [0]:
%run ./_resources/01-setup $reset_all_data=false

In [0]:
dbutils.fs.rm(volume_folder + "/checkpoints", True)

## Creating the table events_raw:

In [0]:
%sql
CREATE TABLE IF NOT EXISTS events_raw (
  key STRING,
  value STRING)
TBLPROPERTIES(
  delta.autoOptimize.optimizeWrite = true,
  delta.autoOptimize.autoCompact = true
);

## Writing messages from Kafka to Bronze table:

In [0]:
stream = (spark
          # Read stream from Kafka:
          .readStream
            .format("kafka")
            .option("kafka.bootstrap.servers", "<your-server>")
            .option("kafka.security.protocol", "SASL_SSL")
            .option("kafka.sasl.mechanism", "PLAIN")
            .option("kafka.sasl.jaas.config", 
              '<your-connection>')
            .option("subscribe", "streaming-learning")
            .option("startingOffsets", "latest") # Consume messages from the end.
            .option("maxOffsetsPerTrigger", "10000") # Control ingestion rate - backpressure.
          .load()
          # Cast columns to strings:
          .withColumn("key", F.col("key").cast("string"))
          .withColumn("value", F.col("value").cast("string"))
          # Write to bronze table (events_raw):
          .writeStream
            .format("delta")
            .trigger(processingTime="20 seconds")
            .option("checkpointLocation", volume_folder + "/checkpoints/bronze")
            .option("mergeSchema", "true")
            .outputMode("append")
            .table("events_raw")
          )

Utils.wait_for_table("events_raw")

In [0]:
%sql
SELECT * 
FROM events_raw;

In [0]:
%sql
SELECT value
FROM events_raw;

In [0]:
%sql
SELECT value:platform AS platform, 
       COUNT(*) 
FROM events_raw 
GROUP BY platform;

In [0]:
Utils.stop_all_streams(sleep_time=120)