# A simple application

Now we will write an application using als-recommender model to suggest some of the best suit movies to a particular user 

## 1. Load model

In [1]:
MODEL_PATH = "./model"
DATA_PATH = "./data"

In [2]:
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

recommender = MatrixFactorizationModel.load(sc, MODEL_PATH)

## 2. Read users, movies data and transform

In [77]:
"""
Data transformation: To transform between userId and movieId (asin) and their Int form 
"""

from os import path
from pyspark.sql import SparkSession

class DTransformer:
    users = None
    movies = None
    data_url = './data/'
    spark_sql = None
    spark = None
    
    def __init__(self, user_dir=None, movie_dir=None):
        
        spark = SparkSession.builder.appName('transformer').getOrCreate()
        spark_sql = SQLContext(spark)
        
        self.spark_sql = spark_sql
        self.spark = spark
        
        if user_dir: 
            users = spark.read.csv(path.join(self.data_url, user_dir))
            spark_sql.registerDataFrameAsTable(users, "users")
            self.users = users         
            
        if movie_dir: 
            movies = spark.read.csv(path.join(self.data_url, movie_dir))
            spark_sql.registerDataFrameAsTable(movies, "movies")
            self.movies = movies
            
        
    def int2UserId(self, num):
        spark = self.spark
        return self.spark.sql('SELECT _c1 FROM users WHERE _c0=' + str(num)).collect()[0]['_c1']
        
    def userId2Int(self, userId):
        spark = self.spark
        return self.spark.sql('SELECT _c0 FROM users WHERE _c1=' + userId).collect()[0]['_c0']

    def int2Asin(self, num):
        spark = self.spark
        return self.spark.sql('SELECT _c1 FROM movies WHERE _c0=' + str(num)).collect()[0]['_c1']

    def asin2Int(self, asin):
        spark = self.spark
        return self.spark.sql('SELECT _c0 FROM movies WHERE _c1=' + asin).collect()[0]['_c0']

#test
tf = DTransformer('users', 'movies')

In [78]:
#test

tf.users.show(10)

tf = DTransformer('users', 'movies')

print(tf.int2UserId(87758125))
print(tf.userId2Int('"A2KQUPC55BM0T"'))

print(tf.int2Asin(50207744))
print(tf.asin2Int('"B001G5T6US"'))

+--------+--------------+
|     _c0|           _c1|
+--------+--------------+
|15387583|A1KX1RGL86TKUR|
|63766600|A1VULUPIYJGE4H|
|12822853|A3OOBVVWR5CP0F|
|65677745|A2HOE63TJYB0XP|
|89133414|A1B6LX8PEEYUIB|
|17474658| AWREX1IOL40Q0|
|41228925|A2T6KMPVXL7P56|
|23315003|A38JIWXS2NZBT7|
|63044041| ANL4QZV6O39GN|
|51659961|A1UQ75MSANQSW0|
+--------+--------------+
only showing top 10 rows

A39K4E0X3EFTUZ
44114386
B001G5T6US
50207744


# 3. Use the model for the recommendation

In [None]:
class recommendation:
    recommender = None
    sc = None
    transformer = None
    
    def __init__(self, path):
        
        self.sc = SparkSession.builder.appName('recommender').getOrCreate()
        self.recommender = MatrixFactorizationModel.load(sc, path)
        self.transformer('users', 'movies')
    
    def findBestMovie(self, userId):
        pass
    
    def findRelatedMovie(self, movieId):
        pass
        
    def recommend(self, userId):
        pass

In [6]:
# Recommend movies for user whose id is 

products_for_users = recommender.recommendProducts(88028, 50)

In [7]:
products_for_users

[Rating(user=88028, product=13093038, rating=13.135790404956538),
 Rating(user=88028, product=87352761, rating=13.097783292951643),
 Rating(user=88028, product=94172197, rating=12.804509618421097),
 Rating(user=88028, product=32986837, rating=12.765355575324943),
 Rating(user=88028, product=17099187, rating=12.699877416500584),
 Rating(user=88028, product=21376692, rating=12.491477255443908),
 Rating(user=88028, product=19638713, rating=12.402393789892272),
 Rating(user=88028, product=59761252, rating=12.27101202389042),
 Rating(user=88028, product=76895774, rating=11.970388854953384),
 Rating(user=88028, product=69981756, rating=11.70569683717388),
 Rating(user=88028, product=12677478, rating=11.537838146105752),
 Rating(user=88028, product=45138793, rating=11.535149524616688),
 Rating(user=88028, product=77018526, rating=11.529115537847462),
 Rating(user=88028, product=99850916, rating=11.327527776452783),
 Rating(user=88028, product=64072761, rating=11.293181973182712),
 Rating(user

----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 43700)
----------------------------------------


Traceback (most recent call last):
  File "/usr/lib/python2.7/SocketServer.py", line 293, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/usr/lib/python2.7/SocketServer.py", line 321, in process_request
    self.finish_request(request, client_address)
  File "/usr/lib/python2.7/SocketServer.py", line 334, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/usr/lib/python2.7/SocketServer.py", line 655, in __init__
    self.handle()
  File "/usr/lib/spark-2.4.6-bin-hadoop2.7/python/pyspark/accumulators.py", line 269, in handle
    poll(accum_updates)
  File "/usr/lib/spark-2.4.6-bin-hadoop2.7/python/pyspark/accumulators.py", line 241, in poll
    if func():
  File "/usr/lib/spark-2.4.6-bin-hadoop2.7/python/pyspark/accumulators.py", line 245, in accum_updates
    num_updates = read_int(self.rfile)
  File "/usr/lib/spark-2.4.6-bin-hadoop2.7/python/pyspark/serializers.py", line 724, in read_int
    raise EOFError
EOFEr