# A simple application

Now we will write an application using als-recommender model to suggest some of the best suit movies to a particular user 

## 1. Load model

    In this section we load the saved model into workspace
    And test it
    To make sure the model work well

In [2]:
MODEL_PATH = "./model/als"
DATA_PATH = "./data"

In [3]:
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

recommender = MatrixFactorizationModel.load(sc, MODEL_PATH)

In [4]:
# Test the model

products_for_users =recommender.recommendProducts(23315003, 10)
products_for_users

[Rating(user=23315003, product=89096211, rating=7.55763020208037),
 Rating(user=23315003, product=5563169, rating=7.416243055798675),
 Rating(user=23315003, product=1764127, rating=7.334043878627421),
 Rating(user=23315003, product=81762843, rating=7.222764450702324),
 Rating(user=23315003, product=21415957, rating=7.212394924360598),
 Rating(user=23315003, product=3977507, rating=7.142694588156918),
 Rating(user=23315003, product=54276349, rating=6.991737114371794),
 Rating(user=23315003, product=45688219, rating=6.953158659999422),
 Rating(user=23315003, product=28753462, rating=6.766552422598935),
 Rating(user=23315003, product=94044974, rating=6.7510200886484935)]

## 2. Read users, movies data and transform

In [5]:
"""
Data transformation: To transform between userId and movieId (asin) and their Int form 
"""

from os import path
from pyspark.sql import SparkSession

class DTransformer:
    users = None
    movies = None
    data_url = './data/'
    spark_sql = None
    spark = None
    
    def __init__(self, user_dir=None, movie_dir=None):
        
        spark = SparkSession.builder.appName('transformer').getOrCreate()
        spark_sql = SQLContext(spark)
        
        self.spark_sql = spark_sql
        self.spark = spark
        
        if user_dir: 
            users = spark.read.csv(path.join(self.data_url, user_dir))
            spark_sql.registerDataFrameAsTable(users, "users")
            self.users = users         
            
        if movie_dir: 
            movies = spark.read.csv(path.join(self.data_url, movie_dir))
            spark_sql.registerDataFrameAsTable(movies, "movies")
            self.movies = movies
            
        
    def int2UserId(self, num):
        spark = self.spark
        return spark.sql('SELECT _c1 FROM users WHERE _c0=' + str(num)).collect()[0]['_c1']
        
    def userId2Int(self, userId):
        spark = self.spark
        return int(spark.sql('SELECT _c0 FROM users WHERE _c1="%s"' % (userId)).collect()[0]['_c0'])

    def int2Asin(self, num):
        spark = self.spark
        return spark.sql('SELECT _c1 FROM movies WHERE _c0=' + str(num)).collect()[0]['_c1']

    def asin2Int(self, asin):
        spark = self.spark
        return int(spark.sql('SELECT _c0 FROM movies WHERE _c1="%s"' % (asin)).collect()[0]['_c0'])

    def test_func(self): 
        spark = self.spark
        spark_sql = self.spark_sql
        users = self.users
        tf.users.show()
        
#test
tf = DTransformer('users', 'movies')
tf.test_func()

+--------+--------------+
|     _c0|           _c1|
+--------+--------------+
|15387583|A1KX1RGL86TKUR|
|63766600|A1VULUPIYJGE4H|
|12822853|A3OOBVVWR5CP0F|
|65677745|A2HOE63TJYB0XP|
|89133414|A1B6LX8PEEYUIB|
|17474658| AWREX1IOL40Q0|
|41228925|A2T6KMPVXL7P56|
|23315003|A38JIWXS2NZBT7|
|63044041| ANL4QZV6O39GN|
|51659961|A1UQ75MSANQSW0|
|48032562|A30E24BZ8Z1YL3|
| 8622532| AK5OAHQSPXLC7|
|83571599|A20MNUDAOP119V|
|57720706| AC3FB3YQBMKSR|
|65623191| AO2QDT69R1JWS|
|57034109|A37SXJD1Y5XJDW|
|52135900|A2G51NDWRKLPBI|
|22627389|A3KCWBZLTGI9NK|
|44114386| A2KQUPC55BM0T|
|95292262|A1YD1C7MQ82CPE|
+--------+--------------+
only showing top 20 rows



In [6]:
#test

print(tf.int2UserId(33812736))
print(tf.userId2Int('A2HMUYANNNCN77'))

print(tf.int2Asin(50207744))
print(tf.asin2Int('B001G5T6US'))

A2HMUYANNNCN77
33812736
B001G5T6US
50207744


# 3. Use the model for the recommendation

In [7]:
class recommendation:
    
    recommender = None
    sc = None
    transformer = None
    
    def __init__(self, path):
        """
        path: path/to/the/als/model
        """
        sc = SparkSession.builder.appName('recommender').getOrCreate()
        rcmd = MatrixFactorizationModel.load(sc, path)
        tf = DTransformer('users', 'movies')
        
        self.sc = sc
        self.recommender = rcmd
        self.transformer = tf
    
    def findBestMovie(self, userId, num = 10):
        """
        Find the best match movies for user based on collaborative matrix factorization
        Return list of ASIN
        
        !!! Need to check if the user has watched these movies
        """
        
        rcmd = self.recommender
        tf = self.transformer

        rating_predict_list = rcmd.recommendProducts(tf.userId2Int(userId), num)
        
        asin_list = [ tf.int2Asin(r[1]) for r in rating_predict_list ]
        
        return asin_list
        
    
    def findRelatedMovie(self, movieId):
        pass
        
    def recommend(self, userId, num=10):       
        ASIN_list = self.findBestMovie(userId, num)
        
        return ASIN_list


In [8]:
rec = recommendation(MODEL_PATH)
movie_list = rec.recommend('A39K4E0X3EFTUZ', 10)

movie_list

[u'B003ZT2H5E',
 u'B00186DF48',
 u'1574924303',
 u'B000I2IUFM',
 u'B0009W5K1S',
 u'B003XEKXXO',
 u'157523128X',
 u'B000SQL4WY',
 u'B001FPKJN2',
 u'B0047PNOMK']