In [1]:
from threading import Thread

class StreamingThread(Thread):
    def __init__(self, ssc):
        Thread.__init__(self)
        self.ssc = ssc
    def run(self):
        ssc.start()
        ssc.awaitTermination()
    def stop(self):
        print('----- Stopping... this may take a few seconds -----')
        self.ssc.stop(stopSparkContext=False, stopGraceFully=True)

In [2]:
sc

In [3]:
spark

In [4]:
from pyspark.streaming import StreamingContext

In [5]:
globals()['models_loaded'] = False
globals()['my_model'] = None

def process(time, rdd):
    if rdd.isEmpty():
        return
    
    print("========= %s =========" % str(time))
    
    # Convert to data frame
    df = spark.read.json(rdd)
    df.show()
    
    # Load in the model if not yet loaded:
    if not globals()['models_loaded']:
        # load in your models here
        from pyspark.ml.tuning import CrossValidatorModel
        globals()['my_model'] = CrossValidatorModel.load('cvmodel')
        globals()['models_loaded'] = True
        
    # And then predict using the loaded model: 
    df_result = globals()['my_model'].transform(df)
    df_result.select('label', 'target', 'prediction').show()

In [6]:
ssc = StreamingContext(sc, 10)

In [7]:
lines = ssc.socketTextStream("seppe.net", 7778)
lines.foreachRDD(process)

In [8]:
ssc_t = StreamingThread(ssc)
ssc_t.start()

+------+-------------------+--------------------+
| label|           tweet_id|          tweet_text|
+------+-------------------+--------------------+
|#covid|1398291679828971524|Bioethics Implica...|
+------+-------------------+--------------------+

+------+------+----------+
| label|target|prediction|
+------+------+----------+
|#covid|   1.0|       0.0|
+------+------+----------+

+------+-------------------+--------------------+
| label|           tweet_id|          tweet_text|
+------+-------------------+--------------------+
|#covid|1398291638087204869|#███████ has more...|
+------+-------------------+--------------------+

+------+------+----------+
| label|target|prediction|
+------+------+----------+
|#covid|   1.0|       1.0|
+------+------+----------+



In [9]:
ssc_t.stop()

----- Stopping... this may take a few seconds -----
+------+-------------------+--------------------+
| label|           tweet_id|          tweet_text|
+------+-------------------+--------------------+
|#covid|1398291631821004801|Serological tests...|
+------+-------------------+--------------------+

+------+------+----------+
| label|target|prediction|
+------+------+----------+
|#covid|   1.0|       1.0|
+------+------+----------+

