This notebook reads prediction requests from a MessageHub (kafka) topic and makes predictions.<br>
In a real world application these requests could be put on the topic by a web application that a user is interacting with.<br>
<br>
This notebook prints the predictions to the console.<br>
A future notebook will put the predictions on another MessageHub topic where it can be read by the web application to make recommendations to the user.

In [None]:
%Addjar http://central.maven.org/maven2/org/apache/kafka/kafka-clients/0.9.0.0/kafka-clients-0.9.0.0.jar
%Addjar http://central.maven.org/maven2/org/apache/kafka/kafka_2.10/0.9.0.0/kafka_2.10-0.9.0.0.jar
%Addjar http://central.maven.org/maven2/org/apache/kafka/kafka-log4j-appender/0.9.0.0/kafka-log4j-appender-0.9.0.0.jar
%Addjar https://github.com/ibm-messaging/message-hub-samples/raw/master/java/message-hub-login-library/messagehub.login-1.0.0.jar
%Addjar https://github.com/ibm-messaging/iot-messgehub-spark-samples/releases/download/v0.1/streaming-kafka.jar

Read the MessageHub properties that were saved by the previous step.

In [1]:
import java.util.Properties
import java.io.FileInputStream

val prop = new Properties()
prop.load(new FileInputStream("messagehub.properties"))

val bootstrap_servers     = prop.getProperty("bootstrap_servers")
val sasl_username         = prop.getProperty("sasl_username")
val sasl_password         = prop.getProperty("sasl_password")
val messagehub_topic_name = prop.getProperty("messagehub_topic_name")
val api_key               = prop.getProperty("api_key")
val kafka_rest_url        = prop.getProperty("kafka_rest_url")

Use spark streaming to retrieve the prediction requests and make predictions.<br>
You will need to stop the notebook kernel to quit the code below.

In [6]:
import scala.collection.mutable.ArrayBuffer
import org.apache.spark.streaming.Duration
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import com.ibm.cds.spark.samples.config.MessageHubConfig
import com.ibm.cds.spark.samples.dstream.KafkaStreaming.KafkaStreamingContextAdapter
import org.apache.kafka.common.serialization.Deserializer
import org.apache.kafka.common.serialization.StringDeserializer
import java.util.UUID
import scala.util.Try

import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
import org.apache.spark.mllib.recommendation.Rating

// load the saved model
val model = MatrixFactorizationModel.load(sc, "./recommender_model/")

// test the model
println( "Prediction for user=1, movie=500 is " + model.predict(1, 500) ) 

val kafkaProps = new MessageHubConfig

kafkaProps.setConfig("bootstrap.servers",   bootstrap_servers)
kafkaProps.setConfig("kafka.user.name",     sasl_username)
kafkaProps.setConfig("kafka.user.password", sasl_password)
kafkaProps.setConfig("kafka.topic",         messagehub_topic_name)
kafkaProps.setConfig("api_key",             api_key)
kafkaProps.setConfig("kafka_rest_url",      kafka_rest_url)
kafkaProps.setConfig("auto.offset.reset",   "earliest") // should this be "smallest"?
kafkaProps.setConfig("group.id",            UUID.randomUUID().toString())

kafkaProps.createConfiguration()

// kafkaProps.toImmutableMap.foreach { keyVal => println(keyVal._1 + "=" + keyVal._2) }
// println("group.id=" + kafkaProps.getConfig("group.id"))

val ssc = new StreamingContext( sc, Seconds(2) )

val stream = ssc.createKafkaStream[String, String, StringDeserializer, StringDeserializer](
                     kafkaProps,
                     List(kafkaProps.getConfig("kafka.topic"))
                     )

// let's wrap the predict function with a try catch block
def predict(userId: Int, movieId: Int): Try[Any] = {
    Try(model.predict(userId, movieId))
}

val moviesToRate = stream.map(_._2.split(","))

moviesToRate.foreachRDD( rdd => {
    for(item <- rdd.collect().toArray) {
        val userId = item(0).toInt
        val movieId = item(1).toInt     
        val prediction = predict(userId, movieId).getOrElse(-1)
        
        // TODO: the predictions should be put on another topic to be consumed
        // by the client application (e.g. a web app)
        
        println(s"$userId, $movieId, $prediction")
    }
})

ssc.start()
ssc.awaitTermination()

// if we didn't want to wait indefinitely, we could replace
// the above statement with:
//
//   ssc.awaitTerminationOrTimeout(30000)
ssc.stop(stopSparkContext=false, stopGracefully=true)

Prediction for user=1, movie=500 is 3.685349066437224
default location of ssl Trust store is: /usr/local/src/spark160master/ibm-java-x86_64-80/jre/lib/security/cacerts
com/ibm/cds/spark/samples/config/jaas.conf
Registering JaasConfiguration: /gpfs/fs01/user/s85d-88ebffb000cc3e-39ca506ba762/notebook/tmp/p5uQW3nsiHMczWE9/jaas.conf
default location of ssl Trust store is: /usr/local/src/spark160master/ibm-java-x86_64-80/jre/lib/security/cacerts
1, 500, 3.685349066437224
1, 501, 3.2725849355746823
1, 502, 1.9849899602592473
1, 503, 2.5721063844976557
100000, 503, -1
