# Reference
`https://spark.apache.org/docs/latest/streaming-programming-guide.html`

In [1]:
# Imports
import findspark
findspark.init()
findspark.find()


'/home/prod/spark-3.3.1-bin-hadoop3'

In [2]:
from pyspark import SparkConf
conf = SparkConf()
conf.setAppName('Structured Streaming Model')
conf.setMaster('spark://spark-master:7077');

In [3]:
# Setup spark environment
from pyspark import SparkContext
from pyspark.sql.types import *
from pprint import pprint, pformat
sc = SparkContext.getOrCreate(conf)
print('Spark web UI link: ', sc._jsc.sc().uiWebUrl().get())


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


22/11/07 15:14:31 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Spark web UI link:  http://spark-master:4040


## Streaming

In [4]:
from pyspark.sql import SparkSession
spark = SparkSession(sc)

In [5]:
from pyspark.sql.functions import explode
from pyspark.sql.functions import split

dStream = spark.readStream \
  .format("socket")\
  .option("host", "spark-master")\
  .option("port", 9999)\
  .load()

# dStream.show()

lines = dStream.select(
  # explode(dStream.value).alias('text')
  explode(
    split(dStream.value, '\n')
  ).alias('text')
)

# wordCounts = lines.groupBy("text").count()

22/11/07 15:14:53 WARN TextSocketSourceProvider: The socket source should not be used for production applications! It does not support recovery.


### Model setup

In [6]:
# Unpickle, pkl file
model_rdd_pkl = sc.binaryFiles("./models/SentimentIntensityAnalyzer.pkl")
model_rdd_data = model_rdd_pkl.collect()

                                                                                

In [7]:
# Load and broadcast python object over spark nodes
import pickle

_model = pickle.loads(model_rdd_data[0][1]) # local
model = sc.broadcast(_model) # broadcasted
print(model.value)

<nltk.sentiment.vader.SentimentIntensityAnalyzer object at 0x7f4a0160a020>


In [8]:
from pyspark.sql.functions import udf

# Create udf and call predict method on broadcasted model
def predict(text):
    prediction = model.value.polarity_scores(text)['compound']
    return float(prediction)

predict_udf = udf(predict, DoubleType())

## Predict on the stream query

In [9]:
from pyspark.sql.functions import col
df = lines.select(
    col('text'),
    predict_udf(col('text')).alias('score')
)


In [10]:
query = df \
    .writeStream \
    .outputMode("append") \
    .format("console") \
    .start()

query.awaitTermination()

22/11/07 15:15:13 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-0909c89c-5a39-4032-8d44-74fb83d93be0. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
22/11/07 15:15:13 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.


                                                                                

-------------------------------------------
Batch: 0
-------------------------------------------
+----+-----+
|text|score|
+----+-----+
+----+-----+



[Stage 2:>                                                          (0 + 2) / 2]

-------------------------------------------
Batch: 1
-------------------------------------------


                                                                                

+--------------------+-------+
|                text|  score|
+--------------------+-------+
|Animal Crossing; ...| 0.6605|
|With a game this ...| 0.9769|
|Above all else, A...| 0.9628|
|Nintendo's comfor...| 0.4404|
|Animal Crossing: ...| 0.9929|
|Animal Crossing: ...| 0.6908|
|Animal Crossing: ...| 0.8316|
|Animal Crossing: ...| 0.9584|
|Animal Crossing: ...| 0.3818|
|I always knew Ani...| 0.8376|
|Animal Crossing: ...| 0.5606|
|The amazing, rela...| 0.9756|
|Animal Crossing: ...| 0.9707|
|New Horizons has ...| 0.9442|
|Animal Crossing: ...| 0.9552|
|Nintendo Switch g...| 0.4416|
|Animal Crossing i...| 0.7184|
|Animal Crossing f...| 0.7096|
|There continues t...|-0.3241|
|Under the illusio...| 0.9001|
+--------------------+-------+
only showing top 20 rows



                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+--------------------+-------+
|                text|  score|
+--------------------+-------+
|A perfect entry-p...| 0.8779|
|This latest Anima...| 0.9571|
|Sometimes, playin...| 0.8925|
|New Horizons is t...|  0.765|
|Animal Crossing: ...| 0.9753|
|Being able to pla...| 0.8555|
|“New Horizons mak...| 0.6369|
|New Horizons has ...| 0.9895|
|New Horizons is t...| 0.8807|
|New Horizons draw...| 0.9127|
|Initially we were...|-0.8521|
|The latest Animal...| 0.9074|
|It's an exception...|-0.0258|
|New Horizons alwa...| 0.6249|
|With an addictive...| 0.9529|
|Animal Crossing: ...| 0.9422|
|Animal Crossing: ...| 0.8402|
|Animal Crossing: ...| 0.7783|
|If you’ve never p...| 0.3536|
|If Animal Crossin...| 0.8834|
+--------------------+-------+
only showing top 20 rows



                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+--------------------+------+
|                text| score|
+--------------------+------+
|With additions li...|0.5346|
|Animal Crossing: ...|0.8126|
|As much as I love...|0.9128|
|Animal Crossing N...| -0.25|
|New Horizons is a...|0.9176|
+--------------------+------+



                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+--------------------+-------+
|                text|  score|
+--------------------+-------+
|By consolidating ...|-0.1531|
|Animal Crossing N...| 0.7163|
|New Horizons is s...| 0.9062|
|Animal Crossing h...| 0.9932|
|Animal Crossing: ...| 0.3182|
|I can't wait to s...|    0.0|
+--------------------+-------+



                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+--------------------+-----+
|                text|score|
+--------------------+-----+
|Animal Crossing: ...|  0.0|
+--------------------+-----+



ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/home/prod/spark-3.3.1-bin-hadoop3/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/home/prod/spark-3.3.1-bin-hadoop3/python/lib/py4j-0.10.9.5-src.zip/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/home/prod/anaconda3/envs/sparkimental/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


KeyboardInterrupt: 

In [None]:
#DONE
query.stop()