# A simple application

Now we will write an application using als-recommender model to suggest some of the best suit movies to a particular user 

## 1. Load model

In [52]:
MODEL_PATH = "./model/als"
DATA_PATH = "./data"

In [53]:
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

recommender = MatrixFactorizationModel.load(sc, MODEL_PATH)

In [57]:
# Test the model 

products_for_users = recommender.recommendProductsForUsers(1)
products_for_users.collect()

[(81437040,
  (Rating(user=81437040, product=64907493, rating=7.2070605942582855),)),
 (1650672,
  (Rating(user=1650672, product=63011343, rating=6.683783772499485),)),
 (37800576,
  (Rating(user=37800576, product=93332201, rating=5.645033232495878),)),
 (31516848,
  (Rating(user=31516848, product=36837038, rating=10.844255334318627),)),
 (74327184,
  (Rating(user=74327184, product=83893931, rating=10.432661392829578),)),
 (93857616,
  (Rating(user=93857616, product=76489910, rating=19.940717942733603),)),
 (88899840,
  (Rating(user=88899840, product=13196794, rating=13.56351493314293),)),
 (74079648,
  (Rating(user=74079648, product=11370452, rating=31.412702436970957),)),
 (30972240,
  (Rating(user=30972240, product=91912604, rating=4.97325054179739),)),
 (45840672,
  (Rating(user=45840672, product=72215344, rating=18.480801626003675),)),
 (54419904,
  (Rating(user=54419904, product=74803596, rating=6.474671501150137),)),
 (14386176,
  (Rating(user=14386176, product=8935867, rating=8

In [60]:
products_for_users =recommender.recommendProducts(53274528, 10)
products_for_users

[Rating(user=53274528, product=69570242, rating=3.477901799037528),
 Rating(user=53274528, product=6171349, rating=3.3712958194337888),
 Rating(user=53274528, product=92892042, rating=3.35751390692887),
 Rating(user=53274528, product=14855922, rating=3.256306400050174),
 Rating(user=53274528, product=38548911, rating=3.0566706399057577),
 Rating(user=53274528, product=14390488, rating=2.999250492390382),
 Rating(user=53274528, product=81783262, rating=2.943965331674366),
 Rating(user=53274528, product=23109164, rating=2.918232545600822),
 Rating(user=53274528, product=51739689, rating=2.8825753447271834),
 Rating(user=53274528, product=8413941, rating=2.875141402356932)]

## 2. Read users, movies data and transform

In [18]:
"""
Data transformation: To transform between userId and movieId (asin) and their Int form 
"""

from os import path
from pyspark.sql import SparkSession

class DTransformer:
    users = None
    movies = None
    data_url = './data/'
    spark_sql = None
    spark = None
    
    def __init__(self, user_dir=None, movie_dir=None):
        
        spark = SparkSession.builder.appName('transformer').getOrCreate()
        spark_sql = SQLContext(spark)
        
        self.spark_sql = spark_sql
        self.spark = spark
        
        if user_dir: 
            users = spark.read.csv(path.join(self.data_url, user_dir))
            spark_sql.registerDataFrameAsTable(users, "users")
            self.users = users         
            
        if movie_dir: 
            movies = spark.read.csv(path.join(self.data_url, movie_dir))
            spark_sql.registerDataFrameAsTable(movies, "movies")
            self.movies = movies
            
        
    def int2UserId(self, num):
        spark_sql = self.spark_sql
        print(spark_sql.sql('SELECT _c1 FROM users WHERE _c0=' + str(num)).collect())
        return self.spark.sql('SELECT _c1 FROM users WHERE _c0=' + str(num)).collect()[0]['_c1']
        
    def userId2Int(self, userId):
        spark = self.spark
        return self.spark.sql('SELECT _c0 FROM users WHERE _c1="%s"' % (userId)).collect()[0]['_c0']

    def int2Asin(self, num):
        spark = self.spark
        return self.spark.sql('SELECT _c1 FROM movies WHERE _c0=' + str(num)).collect()[0]['_c1']

    def asin2Int(self, asin):
        spark = self.spark
        return self.spark.sql('SELECT _c0 FROM movies WHERE _c1="%s"' % (asin)).collect()[0]['_c0']

    def test_func(self): 
        spark = self.spark
        spark_sql = self.spark_sql
        users = self.users
        tf.users.show()
        
#test
tf = DTransformer('users', 'movies')
tf.test_func()

+--------+--------------+
|     _c0|           _c1|
+--------+--------------+
|15387583|A1KX1RGL86TKUR|
|63766600|A1VULUPIYJGE4H|
|12822853|A3OOBVVWR5CP0F|
|65677745|A2HOE63TJYB0XP|
|89133414|A1B6LX8PEEYUIB|
|17474658| AWREX1IOL40Q0|
|41228925|A2T6KMPVXL7P56|
|23315003|A38JIWXS2NZBT7|
|63044041| ANL4QZV6O39GN|
|51659961|A1UQ75MSANQSW0|
|48032562|A30E24BZ8Z1YL3|
| 8622532| AK5OAHQSPXLC7|
|83571599|A20MNUDAOP119V|
|57720706| AC3FB3YQBMKSR|
|65623191| AO2QDT69R1JWS|
|57034109|A37SXJD1Y5XJDW|
|52135900|A2G51NDWRKLPBI|
|22627389|A3KCWBZLTGI9NK|
|44114386| A2KQUPC55BM0T|
|95292262|A1YD1C7MQ82CPE|
+--------+--------------+
only showing top 20 rows



In [22]:
#test

print(tf.int2UserId(33812736))
print(tf.userId2Int('A2KQUPC55BM0T'))

# print(tf.int2Asin(50207744))
# print(tf.asin2Int('B001G5T6US'))

[Row(_c1=u'A2HMUYANNNCN77')]
A2HMUYANNNCN77
44114386


# 3. Use the model for the recommendation

In [51]:

class recommendation:
    
    recommender = None
    sc = None
    transformer = None
    
    def __init__(self, path):
        """
        path: path/to/the/als/model
        """
        sc = SparkSession.builder.appName('recommender').getOrCreate()
        rcmd = MatrixFactorizationModel.load(sc, path)
        tf = DTransformer('users', 'movies')
        
        self.sc = sc
        self.recommender = rcmd
        self.transformer = tf
    
    def findBestMovie(self, userId, num = 10):
        """
        Find the best match movies for user based on collaborative matrix factorization
        Return list of moviesid in int
        
        !!! Need to check if the user has watched these movies
        """
        rcmd = self.recommender
        tf = self.transformer
        
        userint = tf.userId2Int(userId)

        movie_int_list = rcmd.recommendProducts(int(userint), num)
        return movie_int_list
        
    
    def findRelatedMovie(self, movieId):
        pass
        
    def recommend(self, userId):
        mvlist = findBestMovie(userId)
        
# test

rec = recommendation(MODEL_PATH)
rec.findBestMovie('A39K4E0X3EFTUZ')

IllegalArgumentException: u'requirement failed: userId: 87758125 not found in the model'