In [2]:
from pyspark.sql import SparkSession
import redis
import time

def write_to_redis(row):
    stats_key="long-last-action-stats"
    
    redis_conn=redis.Redis(host="localhost", 
                     port=6379, decode_responses=True)
    
    #increment for last action by 1
    redis_conn.zincrby(stats_key,
                        1, row["last_action"])
    redis_conn.quit()  

#create spark session
website_spark = SparkSession\
            .builder\
            .appName("LongLastActionsJob")\
            .config("spark.sql.shuffle.partitions", 2)\
            .config("spark.default.parallelism", 2)\
            .config("spark.sql.streaming.forceDeleteTempCheckpointLocation", True)\
            .config("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version","2")\
            .config("spark.jars", "jars/mysql-connector-j-8.4.0.jar," +\
                                    "jars/commons-pool2-2.12.0.jar," +\
                                    "jars/kafka-clients-3.6.0.jar," + \
                                    "jars/spark-sql-kafka-0-10_2.12-3.5.1.jar," +\
                                    "jars/spark-token-provider-kafka-0-10_2.12-3.5.1.jar," +\
                                    "jars/spark-streaming-kafka-0-10_2.12-3.5.1.jar") \
            .config("spark.driver.extraClassPath","jars/*") \
            .master("local[2]")\
            .getOrCreate()

print("Reading from Kafka...")
raw_last_action_df = website_spark\
                .readStream\
                .format("kafka")\
                .option("kafka.bootstrap.servers","localhost:9092")\
                .option("subscribe","spark.exercise.lastaction.long")\
                .option("startingOffsets","earliest")\
                .load()

#Fetch last_action from vaue
last_action_df = raw_last_action_df\
                .selectExpr("CAST(value as STRING) as last_action")

#Update long last_action counts in real time to Redis
last_action_df.select("last_action")\
        .writeStream\
        .foreach(write_to_redis)\
        .start()

#Query redis to observe counts for long last actions
redis_query_conn=redis.Redis(host="localhost", 
                     port=6379, decode_responses=True)
while True:
    #Print country stats from Redis
    print("\nLong Last Action Stats from Redis\n-------------------------")
    scores=redis_query_conn.zrevrangebyscore(
        "long-last-action-stats",99999,0,withscores=True)
    
    for score in scores:
        print(score[0], " = ", score[1])

    time.sleep(5)


Reading from Kafka...

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

Long Last Action Stats from Redis
-------------------------

L

KeyboardInterrupt: 