## Model Deployment with Spark Serving 
In this example, we try to movie recommendations from the *Movie Ratings* dataset. Then we will use Spark serving to deploy it as a realtime web service. 
First, we import needed packages:

In [2]:
import sys
import numpy as np
import pandas as pd
import mmlspark
import os

from pyspark.ml.feature import StringIndexer
from pyspark.ml.recommendation import ALS
from pyspark.ml import Pipeline, PipelineModel
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.ml.evaluation import RegressionEvaluator

Now let's read the data and split it to train and test sets:

In [4]:
#Columns
userCol='UserId'
itemCol='MovieId'
ratingCol='Rating'

userColIndex = userCol.replace("Id","Index")
itemColIndex = itemCol.replace("Id","Index")


# Download Movie Lens
basedataurl = "http://aka.ms" 
datafile = "MovieRatings.csv"

datafile_dbfs = os.path.join("/dbfs", datafile)

if os.path.isfile(datafile_dbfs):
    print("found {} at {}".format(datafile, datafile_dbfs))
else:
    print("downloading {} to {}".format(datafile, datafile_dbfs))
    urllib.request.urlretrieve(os.path.join(basedataurl, datafile), datafile_dbfs)
    
data_all = sqlContext.read.format('csv')\
                     .options(header='true', delimiter=',', inferSchema='true', ignoreLeadingWhiteSpace='true', ignoreTrailingWhiteSpace='true')\
                     .load(datafile)    
data_all.printSchema()
display(data_all)

train, test = data_all.randomSplit([0.75, 0.25], seed=123)

Next, we will create a Cross Validator pipeline, in order to tune a Spark ALS model.

In [6]:
indexerContacts = StringIndexer(inputCol=userCol, outputCol=userColIndex, handleInvalid='keep').fit(data_all)
indexerRules = StringIndexer(inputCol=itemCol, outputCol=itemColIndex, handleInvalid='keep').fit(data_all)

als = ALS(maxIter=5, userCol=userColIndex, itemCol=itemColIndex, ratingCol=ratingCol, coldStartStrategy="drop")

# put together the pipeline
pipe = Pipeline(stages=[indexerContacts, indexerRules, als])

# Regularization Rates
regs = [1, 0.1, 0.001]
paramGrid = ParamGridBuilder().addGrid(als.regParam, regs).build()


evaluator = RegressionEvaluator(metricName="rmse", labelCol=ratingCol, predictionCol="prediction")
cv = CrossValidator(estimator=pipe, evaluator=evaluator, estimatorParamMaps=paramGrid)
train.cache()
model = cv.fit(train)

In [7]:
testInput = indexerContacts.transform(test).select("UserIndex")

Now, we will define the webservice input/output.
For more information, you can visit the [documentation for Spark Serving](https://github.com/Azure/mmlspark/blob/master/docs/mmlspark-serving.md)

In [9]:
from pyspark.sql.functions import col, from_json, broadcast
from pyspark.sql.types import *
import uuid
from mmlspark import request_to_string, string_to_response

serving_inputs = spark.readStream.server() \
    .address("localhost", 8898, "my_api") \
    .load()\
    .parseRequest(testInput.schema)

recommendations = model.bestModel.stages[2].recommendForAllUsers(10).cache()

serving_outputs = serving_inputs \
  .join(broadcast(recommendations), 'UserIndex') \
  .makeReply("recommendations")

server = serving_outputs.writeStream \
    .server() \
    .replyTo("my_api") \
    .queryName("my_query") \
    .option("checkpointLocation", "checkpoints-{}".format(uuid.uuid1())) \
    .start()


Test the webservice

In [11]:
import requests
data = u'{"UserIndex":13621.0}'
r = requests.post(data=data, url="http://localhost:8898/my_api")
print("Response {}".format(r.text))

In [12]:
import requests
data = u'{"UserIndex":4247.0}'
r = requests.post(data=data, url="http://localhost:8898/my_api")
print("Response {}".format(r.text))

In [13]:
import time
time.sleep(20) # wait for server to finish setting up (just to be safe)
server.stop()