In [1]:
import threading

# Helper thread to avoid the Spark StreamingContext from blocking Jupyter
        
class StreamingThread(threading.Thread):
    def __init__(self, ssc):
        super().__init__()
        self.ssc = ssc
    def run(self):
        self.ssc.start()
        self.ssc.awaitTermination()
    def stop(self):
        print('----- Stopping... this may take a few seconds -----')
        self.ssc.stop(stopSparkContext=False, stopGraceFully=True)

In [2]:
sc

In [3]:
spark

In [4]:
import random
from pyspark.streaming import StreamingContext
from pyspark.sql import Row
from pyspark.sql.functions import udf, struct, array, col, lit, lower
from pyspark.sql.types import StringType
from pyspark.ml.feature import RegexTokenizer, StopWordsRemover, CountVectorizer
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline, PipelineModel
from pyspark.ml.feature import OneHotEncoder, StringIndexer, VectorAssembler
from pyspark.ml.feature import HashingTF, IDF
from pyspark.ml.tuning import CrossValidator, CrossValidatorModel

In [5]:
globals()['models_loaded'] = False
globals()['my_model'] = CrossValidatorModel.load('/Users/christianbutcher/Desktop/spark/model')

                                                                                

In [6]:
def process(time, rdd):
    if rdd.isEmpty():
        return
    
    print("========= %s =========" % str(time))
    
    # Convert to data frame
    df = spark.read.json(rdd)
    df.show()
    
    # Load in the model if not yet loaded:
    if not globals()['models_loaded']:
        # load in your models here
        globals()['my_model'] = CrossValidatorModel.load('/Users/christianbutcher/Desktop/spark/model') 
        globals()['models_loaded'] = True
    
    df = df.withColumn("review_text", lower(df["review_text"]))
    
    df_result = globals()['my_model'].transform(df)
    df_result.select('app_id','label','review_id','review_text','prediction').show()

In [7]:
ssc = StreamingContext(sc, 10)

In [8]:
lines = ssc.socketTextStream("seppe.net", 7778)
lines.foreachRDD(process)

In [9]:
ssc_t = StreamingThread(ssc)
ssc_t.start()

[Stage 34:>                                                         (0 + 1) / 1]

23/05/18 11:54:09 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:09 WARN BlockManager: Block input-0-1684407249200 replicated to only 0 peer(s) instead of 1 peers


                                                                                

23/05/18 11:54:10 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:10 WARN BlockManager: Block input-0-1684407250400 replicated to only 0 peer(s) instead of 1 peers


                                                                                

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|1651490|    1|138475163|[b][u]Disclaimer:...|
+-------+-----+---------+--------------------+

23/05/18 11:54:12 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:12 WARN BlockManager: Block input-0-1684407252200 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:54:13 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:13 WARN BlockManager: Block input-0-1684407253200 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:54:15 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:15 WARN BlockManager: Block input-0-1684407255400 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:54:18 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:18 WARN BlockManager: Block input-

[Stage 34:>                                                         (0 + 1) / 1]

23/05/18 11:54:26 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:26 WARN BlockManager: Block input-0-1684407266400 replicated to only 0 peer(s) instead of 1 peers


[Stage 34:>                 (0 + 1) / 1][Stage 72:>                 (0 + 1) / 1]                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
|1651490|    1|138475163|[b][u]disclaimer:...|       1.0|
+-------+-----+---------+--------------------+----------+



[Stage 34:>                                                         (0 + 1) / 1]                                                                                

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|1494420|    1|138476632|Game shows too mu...|
|1494420|    1|138475232|bones picked clea...|
|1494420|    1|138475012|Great! I uh, thou...|
|1186660|    1|138475887|Mindless fun for ...|
|1557990|    0|138476515|Always getting ki...|
+-------+-----+---------+--------------------+



                                                                                

23/05/18 11:54:31 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:31 WARN BlockManager: Block input-0-1684407271200 replicated to only 0 peer(s) instead of 1 peers


                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
|1494420|    1|138476632|game shows too mu...|       0.0|
|1494420|    1|138475232|bones picked clea...|       1.0|
|1494420|    1|138475012|great! i uh, thou...|       1.0|
|1186660|    1|138475887|mindless fun for ...|       1.0|
|1557990|    0|138476515|always getting ki...|       1.0|
+-------+-----+---------+--------------------+----------+

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|1940340|    1|138477111|                    |
|1940340|    1|138476683|Absolute blast, I...|
|1940340|    0|138476225|having played thi...|
+-------+-----+---------+--------------------+



                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
|1940340|    1|138477111|                    |       1.0|
|1940340|    1|138476683|absolute blast, i...|       1.0|
|1940340|    0|138476225|having played thi...|       0.0|
+-------+-----+---------+--------------------+----------+



                                                                                

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|1940340|    1|138475978|Different from DD...|
+-------+-----+---------+--------------------+



                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
|1940340|    1|138475978|different from dd...|       1.0|
+-------+-----+---------+--------------------+----------+



[Stage 34:>                                                         (0 + 1) / 1]

23/05/18 11:54:54 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:54 WARN BlockManager: Block input-0-1684407294400 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:54:59 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:54:59 WARN BlockManager: Block input-0-1684407299400 replicated to only 0 peer(s) instead of 1 peers


                                                                                

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|1159690|    0|138476983|Will keep it simp...|
| 758690|    0|138475525|Not willing to ke...|
+-------+-----+---------+--------------------+

23/05/18 11:55:01 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:01 WARN BlockManager: Block input-0-1684407301400 replicated to only 0 peer(s) instead of 1 peers


                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
|1159690|    0|138476983|will keep it simp...|       0.0|
| 758690|    0|138475525|not willing to ke...|       0.0|
+-------+-----+---------+--------------------+----------+



[Stage 34:>                                                         (0 + 1) / 1]

23/05/18 11:55:05 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:05 WARN BlockManager: Block input-0-1684407305600 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:55:09 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:09 WARN BlockManager: Block input-0-1684407309600 replicated to only 0 peer(s) instead of 1 peers


                                                                                

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
| 669330|    0|138476857|Get ready to face...|
| 669330|    1|138476579|                  <3|
|2369390|    0|138477499|Do not play this ...|
+-------+-----+---------+--------------------+

23/05/18 11:55:11 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:11 WARN BlockManager: Block input-0-1684407311400 replicated to only 0 peer(s) instead of 1 peers


                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
| 669330|    0|138476857|get ready to face...|       1.0|
| 669330|    1|138476579|                  <3|       1.0|
|2369390|    0|138477499|do not play this ...|       0.0|
+-------+-----+---------+--------------------+----------+



[Stage 34:>                                                         (0 + 1) / 1]

23/05/18 11:55:13 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:13 WARN BlockManager: Block input-0-1684407313400 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:55:17 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:17 WARN BlockManager: Block input-0-1684407317400 replicated to only 0 peer(s) instead of 1 peers


                                                                                

23/05/18 11:55:20 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:20 WARN BlockManager: Block input-0-1684407320400 replicated to only 0 peer(s) instead of 1 peers
+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|2369390|    1|138477496|  Good game go buy\n|
|2369390|    0|138476899|Btw you need to e...|
|2369390|    1|138476891|osm man simply cr...|
+-------+-----+---------+--------------------+



[Stage 34:>                                                         (0 + 1) / 1]                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
|2369390|    1|138477496|  good game go buy\n|       1.0|
|2369390|    0|138476899|btw you need to e...|       0.0|
|2369390|    1|138476891|osm man simply cr...|       1.0|
+-------+-----+---------+--------------------+----------+



[Stage 34:>                                                         (0 + 1) / 1]

23/05/18 11:55:23 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:23 WARN BlockManager: Block input-0-1684407323400 replicated to only 0 peer(s) instead of 1 peers


                                                                                

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|2369390|    0|138476261|One of my least f...|
|2369390|    1|138476259|                Dope|
+-------+-----+---------+--------------------+



                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
|2369390|    0|138476261|one of my least f...|       1.0|
|2369390|    1|138476259|                dope|       1.0|
+-------+-----+---------+--------------------+----------+



[Stage 34:>                                                         (0 + 1) / 1]

23/05/18 11:55:52 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:52 WARN BlockManager: Block input-0-1684407352600 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:55:53 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:53 WARN BlockManager: Block input-0-1684407353600 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:55:58 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:58 WARN BlockManager: Block input-0-1684407358600 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:55:59 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:55:59 WARN BlockManager: Block input-0-1684407359600 replicated to only 0 peer(s) instead of 1 peers


                                                                                



[Stage 34:>                                                         (0 + 1) / 1]                                                                                

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
| 855740|    1|138477398|VERY good game fo...|
| 605740|    0|138475620|Game was under 1 ...|
|2409810|    1|138477362|Immerse yourself ...|
|2404760|    1|138475694|Pretty good puzzl...|
+-------+-----+---------+--------------------+



[Stage 34:>                                                         (0 + 1) / 1]                                                                                

+-------+-----+---------+--------------------+----------+
| app_id|label|review_id|         review_text|prediction|
+-------+-----+---------+--------------------+----------+
| 855740|    1|138477398|very good game fo...|       1.0|
| 605740|    0|138475620|game was under 1 ...|       0.0|
|2409810|    1|138477362|immerse yourself ...|       1.0|
|2404760|    1|138475694|pretty good puzzl...|       1.0|
+-------+-----+---------+--------------------+----------+



[Stage 34:>                                                         (0 + 1) / 1]

In [10]:
ssc_t.stop()

----- Stopping... this may take a few seconds -----
23/05/18 11:56:04 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/18 11:56:04 WARN BlockManager: Block input-0-1684407364600 replicated to only 0 peer(s) instead of 1 peers
23/05/18 11:56:05 ERROR ReceiverTracker: Deregistered receiver for stream 0: Stopped by driver
23/05/18 11:56:04 WARN SocketReceiver: Error receiving data
java.net.SocketException: Socket closed
	at java.base/java.net.SocketInputStream.socketRead0(Native Method)
	at java.base/java.net.SocketInputStream.socketRead(SocketInputStream.java:115)
	at java.base/java.net.SocketInputStream.read(SocketInputStream.java:168)
	at java.base/java.net.SocketInputStream.read(SocketInputStream.java:140)
	at java.base/sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:284)
	at java.base/sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:326)
	at java.base/sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178)
	at java.base/java.io.InputStreamRead

Exception in thread "receiver-supervisor-future-0" java.lang.InterruptedException: sleep interrupted
	at java.base/java.lang.Thread.sleep(Native Method)
	at org.apache.spark.streaming.receiver.ReceiverSupervisor.$anonfun$restartReceiver$1(ReceiverSupervisor.scala:196)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
	at scala.util.Success.$anonfun$map$1(Try.scala:255)
	at scala.util.Success.map(Try.scala:213)
	at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
	at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
	at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
	at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.ja

+------+-----+---------+--------------------+
|app_id|label|review_id|         review_text|
+------+-----+---------+--------------------+
|824600|    1|138475635|so awesome that i...|
+------+-----+---------+--------------------+

+------+-----+---------+--------------------+----------+
|app_id|label|review_id|         review_text|prediction|
+------+-----+---------+--------------------+----------+
|824600|    1|138475635|so awesome that i...|       1.0|
+------+-----+---------+--------------------+----------+

