# Ref
```
https://towardsdatascience.com/machine-learning-model-deployment-using-spark-585e80b2eae1
```

In [None]:
# Imports
import findspark

findspark.init()
findspark.find()

In [None]:
from pyspark import SparkConf

conf = SparkConf()
conf.setAppName('Model Deploy Test')
conf.setMaster('spark://spark-master:7077');

In [None]:
# Setup spark environment
from pyspark import SparkContext
from pyspark.sql.types import *
from pprint import pprint, pformat
sc = SparkContext.getOrCreate(conf)


### Model setup

In [None]:
# Unpickle, pkl file
model_rdd_pkl = sc.binaryFiles("./models/SentimentIntensityAnalyzer.pkl")
model_rdd_data = model_rdd_pkl.collect()

In [None]:
# Load and broadcast python object over spark nodes
import pickle

_model = pickle.loads(model_rdd_data[0][1]) # local
model = sc.broadcast(_model) # broadcasted
print(model.value)

In [None]:
from pyspark.sql.functions import udf

# Create udf and call predict method on broadcasted model
def predict(text):
    prediction = model.value.polarity_scores(text)
    return float(prediction)

predict_udf = udf(predict, DoubleType())

### Batch Stream Pred

In [None]:
from pyspark.streaming import StreamingContext

ssc = StreamingContext(sc, 2)

In [None]:
lines = ssc.socketTextStream("spark-master", 9999)

In [None]:
results = lines.map(lambda line: {
    'input' : ' '.join(line.split(' ')[0: 5]),
    'SScore' : predict_udf(line)
})

# # Count each word in each batch
# pairs = words.map(lambda word: (word, 1))
# result = words.map(lambda)
# wordCounts = pairs.reduceByKey(lambda x, y: x + y)

# Print the first ten elements of each RDD generated in this DStream to the console
results.pprint()

In [None]:

ssc.start()             # Start the computation
ssc.awaitTermination()  # Wait for the computation to terminate

# go to spark-master and type `nc -lk 9999` to open a data server that we can input text into
# input anything and data will be sent over to this console
# the output put below is working :3

# or just use test server instead