In [1]:
from google.colab import drive
drive.mount("/gdrive")
%cd "gdrive"

Mounted at /gdrive
[Errno 2] No such file or directory: 'gdrive'
/content


In [2]:
import os
os.chdir("/gdrive/MyDrive/big_data")
!ls

'aws cloud'			 RecommendationApplication
'Big Data Uygulamaları Kaynak'	 server-installation
 IdeaProjects			 spark-mllib


In [3]:
pip install -q pyspark

[K     |████████████████████████████████| 281.3 MB 35 kB/s 
[K     |████████████████████████████████| 198 kB 55.4 MB/s 
[?25h  Building wheel for pyspark (setup.py) ... [?25l[?25hdone


In [4]:
from pyspark.sql import SparkSession

In [5]:
sparkSession = SparkSession.builder.appName("ALS Book Recommendation System").getOrCreate()

In [9]:
bookDS = sparkSession.read.csv(path="/gdrive/MyDrive/big_data/RecommendationApplication/books.csv", header=True, inferSchema=True)

In [10]:
ratingDS = sparkSession.read.csv(path="/gdrive/MyDrive/big_data/RecommendationApplication/ratings.csv", header=True, inferSchema=True)

# 1.Data Preprocessing

In [11]:
(train_data, test_data) = ratingDS.randomSplit([0.8, 0.2], seed=42)

# 2.Create Model

In [12]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [13]:
als = ALS(userCol="user_id", itemCol="book_id", ratingCol="rating", coldStartStrategy="drop", nonnegative=True, implicitPrefs=False, seed=0)

# 3.Train Model

In [14]:
model = als.fit(train_data)

# 4.Predictions

In [15]:
predictions = model.transform(test_data)
predictions.show(5)

+-------+-------+------+----------+
|book_id|user_id|rating|prediction|
+-------+-------+------+----------+
|      3|  32592|     5| 3.3034532|
|      7|  19984|     5| 4.8631363|
|     20|  32592|     4|  3.908689|
|     22|  19984|     5|  4.801406|
|     26|  35982|     2| 2.7172246|
+-------+-------+------+----------+
only showing top 5 rows



# 5.Evaluate Model


In [16]:
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")

In [17]:
rmse = evaluator.evaluate(predictions)
print(rmse)

0.9151110762031691


# 6.Recommendations

In [22]:
bookrecommend = model.recommendForAllUsers(numItems=5)
bookrecommend.show()



+-------+--------------------+
|user_id|     recommendations|
+-------+--------------------+
|      1|[{1462, 4.05055},...|
|      3|[{6590, 1.1311747...|
|      5|[{7831, 5.128952}...|
|      6|[{3124, 5.075501}...|
|      9|[{7831, 4.551137}...|
|     12|[{8946, 4.176677}...|
|     13|[{7831, 4.3549337...|
|     15|[{4778, 4.0326495...|
|     16|[{3920, 4.9744034...|
|     17|[{7063, 4.9005985...|
|     19|[{3953, 4.302048}...|
|     20|[{1338, 5.145875}...|
|     22|[{1338, 4.5002127...|
|     26|[{8946, 4.038039}...|
|     27|[{4868, 5.075374}...|
|     28|[{9578, 4.266548}...|
|     31|[{7831, 4.438125}...|
|     34|[{2292, 3.4391937...|
|     35|[{8946, 3.977139}...|
|     37|[{7063, 5.0781956...|
+-------+--------------------+
only showing top 20 rows



In [23]:
userrecommend = model.recommendForAllItems(numUsers=5)
userrecommend.show(5, truncate=False)



+-------+---------------------------------------------------------------------------------------------------+
|book_id|recommendations                                                                                    |
+-------+---------------------------------------------------------------------------------------------------+
|1      |[{40181, 6.0019536}, {35916, 5.875065}, {48274, 5.8682704}, {13341, 5.8470078}, {31915, 5.82951}]  |
|3      |[{30440, 5.437125}, {50531, 5.0368066}, {30859, 4.9892344}, {18385, 4.9765987}, {20218, 4.9703507}]|
|5      |[{53349, 5.6333094}, {9383, 5.6288776}, {41819, 5.541909}, {28800, 5.518256}, {38884, 5.457624}]   |
|6      |[{50531, 5.6786737}, {43350, 5.5281353}, {41031, 5.5138097}, {53349, 5.5060344}, {48001, 5.471308}]|
|9      |[{30440, 5.21437}, {32039, 5.0102534}, {36501, 4.998862}, {21694, 4.9959164}, {52566, 4.965887}]   |
+-------+---------------------------------------------------------------------------------------------------+
only showi

In [20]:
userrecommend.first()

Row(book_id=1, recommendations=[Row(user_id=40181, rating=6.001953601837158), Row(user_id=35916, rating=5.875064849853516), Row(user_id=48274, rating=5.868270397186279), Row(user_id=13341, rating=5.847007751464844), Row(user_id=31915, rating=5.82951021194458)])