In [0]:
from pyspark.sql import SparkSession

In [0]:
spark = SparkSession.builder.appName('ALS Book Recommendation System').getOrCreate()

In [0]:
ratingDF = spark.read.csv('/FileStore/tables/ratings.csv',header="true",inferSchema="true")

In [0]:
bookDF = spark.read.csv('/FileStore/tables/books.csv',header="true",inferSchema="true")

In [0]:
from pyspark.ml.recommendation import ALS 
from pyspark.ml.evaluation import RegressionEvaluator

In [0]:
als = ALS(userCol="user_id", itemCol="book_id", ratingCol="rating",coldStartStrategy="drop", nonnegative = True, implicitPrefs= False)


In [0]:
(training, test) = ratingDF.randomSplit([0.8, 0.2])

In [0]:
model = als.fit(training)

In [0]:
predictions = model.transform(test)

In [0]:
predictions.show(5)

+-------+-------+------+----------+
|book_id|user_id|rating|prediction|
+-------+-------+------+----------+
|      1|  32592|     4| 4.3245516|
|      2|  32592|     4| 4.0473323|
|      6|  35982|     2| 3.8673167|
|     18|  32592|     4|  4.003563|
|     23|  32592|     4| 3.9200382|
+-------+-------+------+----------+
only showing top 5 rows



In [0]:
evaluator = RegressionEvaluator(metricName="rmse", labelCol = "rating", predictionCol="prediction")

In [0]:
rmse = evaluator.evaluate(predictions)

In [0]:
print(str(rmse))

0.9161946422741925


In [0]:
bookrecommend=model.recommendForAllUsers(numItems=5)

In [0]:
bookrecommend.show()

+-------+--------------------+
|user_id|     recommendations|
+-------+--------------------+
|     26|[{5454, 4.1139164...|
|     27|[{7844, 4.9555135...|
|     28|[{1198, 4.5944614...|
|     31|[{3628, 4.257779}...|
|     34|[{9842, 3.4185054...|
|     44|[{7063, 4.9312105...|
|     53|[{6924, 2.0278544...|
|     65|[{9842, 5.390405}...|
|     76|[{7401, 5.7196074...|
|     78|[{5730, 2.7363605...|
|     81|[{9842, 2.9970207...|
|     85|[{8187, 5.0689964...|
|    101|[{9531, 5.419071}...|
|    103|[{9842, 4.510805}...|
|    108|[{4778, 4.1035943...|
|    115|[{2507, 3.212502}...|
|    126|[{5202, 4.6641765...|
|    133|[{4594, 5.549215}...|
|    137|[{7063, 4.564987}...|
|    148|[{8926, 4.6269703...|
+-------+--------------------+
only showing top 20 rows



In [0]:
bookrecommend.first()

Out[18]: Row(user_id=26, recommendations=[Row(book_id=5454, rating=4.113916397094727), Row(book_id=8187, rating=4.112738132476807), Row(book_id=2840, rating=4.086423397064209), Row(book_id=2149, rating=4.083437919616699), Row(book_id=9531, rating=4.0644354820251465)])

In [0]:
userrecommend = model.recommendForAllItems(numUsers=5)

In [0]:
userrecommend.show()

+-------+--------------------+
|book_id|     recommendations|
+-------+--------------------+
|     26|[{5527, 5.3773174...|
|     27|[{6600, 5.847627}...|
|     28|[{38723, 5.876761...|
|     31|[{10941, 5.882321...|
|     34|[{31285, 4.756283...|
|     44|[{41051, 5.256551...|
|     53|[{20124, 4.873976...|
|     65|[{46127, 5.907905...|
|     76|[{13108, 6.010493...|
|     78|[{10423, 5.127031...|
|     81|[{31915, 5.481965...|
|     85|[{41569, 5.893685...|
|    101|[{16555, 5.364329...|
|    103|[{31915, 5.773507...|
|    108|[{18886, 4.730547...|
|    115|[{37246, 5.774070...|
|    126|[{21791, 5.86339}...|
|    133|[{30757, 5.42681}...|
|    137|[{9284, 5.800718}...|
|    148|[{14226, 4.980247...|
+-------+--------------------+
only showing top 20 rows

