# Spark Recommender System

In [1]:
# import necessary modules
import os
import shutil
import pyspark as ps
from pyspark.ml import Pipeline, Transformer
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS, ALSModel
from pyspark.sql import Row
from pyspark.sql.types import DoubleType

In [2]:
# create spark context
spark = (ps.sql.SparkSession.builder
        .appName("ALS model")
        .getOrCreate()
        )
sc = spark.sparkContext
print(spark.version)

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/12/23 11:31:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


3.5.0


## Read in model

We will use user clusters as part of user features for our model here. 

In [3]:
final_model = ALSModel.load("./model/bestmodel")


In [4]:
# generate top_n product recommendations for user
nrecommend = 5
user_recs = final_model.recommendForAllUsers(nrecommend)
user_recs.show(4)

23/12/23 11:32:03 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS

+--------------+--------------------+
|customer_index|     recommendations|
+--------------+--------------------+
|            26|[{4332, 1.1560177...|
|            27|[{7645, 5.0136065...|
|            28|[{8766, 1.2823216...|
|            31|[{2857, 4.93124},...|
+--------------+--------------------+
only showing top 4 rows



                                                                                

In [5]:
recs = user_recs.toPandas()

                                                                                

## Recommender Function

In [6]:
import pandas as pd

user_features_df = pd.read_csv("./model_data/user_model.csv")
products = pd.read_csv("./model_data/item_model.csv")

In [7]:
nrecommend = 5
def user_recommendations(user_id, top_n = 3):
    
    if top_n > nrecommend:
        print("Please select up to {} items to recommend".format(nrecommend))
        return; 
    
    prior_purchases = user_features_df[user_features_df['customer_unique_id'] == user_id]\
                                                                                        ['product_id'].unique()
    num_items = len(prior_purchases)
    
    if num_items < 3:
        items = num_items
    else:
        items = 3
    
    print("User: {}\n".format(user_id))
    print("Known positives: ")
    for n in range(items):
        known_like_product = user_features_df[user_features_df['customer_unique_id'] == user_id]\
                                                            ['product_id'].unique()[n]
        known_like_category = products[products['product_id'] == known_like_product]\
                                                            ['product_category_name'].unique()[0]
    
        print("\t", known_like_product)
        print("\t", known_like_category, "\n")
    
    
    customer_index = user_features_df[user_features_df['customer_unique_id'] == user_id]\
                                                            ['customer_index'].unique()[0]
    print("Top {} Recommendations: \n".format(top_n))
    rec_products = []
    
    for n in range(top_n):
        
        rec_products.append(list(recs[recs['customer_index'] == customer_index]['recommendations'])[0][n][0])
        
        print("{}.\n".format(n+1), products[products['product_index'] == rec_products[n]]\
                                                  [['product_id', 'product_category_name']].iloc[0][0])
        
        print(products[products['product_index'] == rec_products[n]]\
                                                  [['product_id', 'product_category_name']].iloc[0][1])

__Test for customer_id = 'c8ed31310fc440a3f8031b177f9842c3'__

In [8]:
user_recommendations('c8ed31310fc440a3f8031b177f9842c3', top_n=5)

User: c8ed31310fc440a3f8031b177f9842c3

Known positives: 
	 1065e0ebef073787a7bf691924c60eeb
	 construction_tools_construction 

	 0cf2faf9749f53924cea652a09d8e327
	 construction_tools_construction 

	 0de59eddc63167215c972b0d785ffa7b
	 construction_tools_construction 

Top 5 Recommendations: 

1.
 d68eca92c5a12467b856479934dc39af
telephony
2.
 f399bbedb1d21192dffefc4a8b30a229
housewares
3.
 2c3dfe5b09e660349378f30d628d70bb
construction_tools_construction
4.
 6b770b5934dbdb29064efdf34bfa6cf5
sports_leisure
5.
 f20b79bd0939728b29d409062c5c486b
housewares


__Test for customer_id = '698e1cf81d01a3d389d96145f7fa6df8'__

In [9]:
user_recommendations('b56d31572e47b1e6d1b88d3128f2226b', top_n=5)

User: b56d31572e47b1e6d1b88d3128f2226b

Known positives: 
	 9d364fec2ac9a80f64bae58b5c034832
	 housewares 

Top 5 Recommendations: 

1.
 5dddb31154cbd968caa4706ef0f4e0f0
garden_tools
2.
 2ca431a4c4a04c65e66c345e282d0a54
small_appliances
3.
 acffe5d7cd56e6b564cf6841486644ff
health_beauty
4.
 3014e35fd70fce29095ced5cdc89f4ce
telephony
5.
 42eb5329dd0ee3d76d135ce04c84451b
food_drink


__Test for customer_id = '89be58cbdd6ef318e3ed93fdb22be178'__

In [10]:
user_recommendations('89be58cbdd6ef318e3ed93fdb22be178', top_n=5)

User: 89be58cbdd6ef318e3ed93fdb22be178

Known positives: 
	 3fdb534dccf5bc9ab0406944b913787d
	 diapers_and_hygiene 

Top 5 Recommendations: 

1.
 0723afa6f9a5a11c512396db0bb03051
stationery
2.
 50627d7c3dd43902d64134574befe12f
computers_accessories
3.
 61c9ce380f22395eecde18d76d118c47
electronics
4.
 72d135605b8f0dd92505c47ca12157bd
health_beauty
5.
 7e646f7e1e428a1ddde5a18dd9d42e95
office_furniture


In [11]:
user_recommendations('c8ed31310fc440a3f8031b177f9842c3', top_n=5)

User: c8ed31310fc440a3f8031b177f9842c3

Known positives: 
	 1065e0ebef073787a7bf691924c60eeb
	 construction_tools_construction 

	 0cf2faf9749f53924cea652a09d8e327
	 construction_tools_construction 

	 0de59eddc63167215c972b0d785ffa7b
	 construction_tools_construction 

Top 5 Recommendations: 

1.
 d68eca92c5a12467b856479934dc39af
telephony
2.
 f399bbedb1d21192dffefc4a8b30a229
housewares
3.
 2c3dfe5b09e660349378f30d628d70bb
construction_tools_construction
4.
 6b770b5934dbdb29064efdf34bfa6cf5
sports_leisure
5.
 f20b79bd0939728b29d409062c5c486b
housewares
