In [None]:
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.3.1.tar.gz (281.4 MB)
[K     |████████████████████████████████| 281.4 MB 43 kB/s 
[?25hCollecting py4j==0.10.9.5
  Downloading py4j-0.10.9.5-py2.py3-none-any.whl (199 kB)
[K     |████████████████████████████████| 199 kB 68.2 MB/s 
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.3.1-py2.py3-none-any.whl size=281845512 sha256=a9027ffbc9ed283fdc4af7f5e94b4a96d517ac878b74b453de70306e19052350
  Stored in directory: /root/.cache/pip/wheels/43/dc/11/ec201cd671da62fa9c5cc77078235e40722170ceba231d7598
Successfully built pyspark
Installing collected packages: py4j, pyspark
Successfully installed py4j-0.10.9.5 pyspark-3.3.1


In [None]:
from pyspark.sql import SparkSession

appName = "Sistem Rekomender di Spark"
spark = SparkSession \
    .builder \
    .appName(appName) \
    .config("spark.some.config.option", "some-value") \
    .getOrCreate()

spark
sc = spark.sparkContext

In [None]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS
from pyspark.sql import Row

In [None]:
lines = spark.read.text("/content/sample_movielens_ratings.txt").rdd
parts = lines.map(lambda x: x.value.split("::"))

ratingsRDD = parts.map(lambda x: Row(userId=int(x[0]), movieId=int(x[1]), rating=float(x[2]), timestamp=int(x[3])))

In [None]:
ratings = spark.createDataFrame(ratingsRDD)

In [None]:
(training,test)= ratings.randomSplit([0.8, 0.2])

In [None]:
training.show(10)

+------+-------+------+----------+
|userId|movieId|rating| timestamp|
+------+-------+------+----------+
|     0|      2|   3.0|1424380312|
|     0|      3|   1.0|1424380312|
|     0|      9|   4.0|1424380312|
|     0|     11|   1.0|1424380312|
|     0|     12|   2.0|1424380312|
|     0|     17|   1.0|1424380312|
|     0|     21|   1.0|1424380312|
|     0|     23|   1.0|1424380312|
|     0|     26|   3.0|1424380312|
|     0|     28|   1.0|1424380312|
+------+-------+------+----------+
only showing top 10 rows



In [None]:
als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating", coldStartStrategy="drop")
model = als.fit(training)

In [None]:
predictions = model.transform(test)
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")

In [None]:
rmse = evaluator.evaluate(predictions)
print("root mean square error = "+ str(rmse))

root mean square error = 1.9130158037760667


In [None]:
# Generate top 10 movie recommendations for each user
userRecs = model.recommendForAllUsers(10)
# Generate top 10 user recommendation for each movie
movieRecs = model.recommendForAllItems(10)

In [None]:
userRecs.show(10)

+------+--------------------+
|userId|     recommendations|
+------+--------------------+
|    20|[{22, 4.8527174},...|
|    10|[{2, 3.9856744}, ...|
|     0|[{9, 3.8117704}, ...|
|     1|[{55, 4.7284493},...|
|    21|[{29, 5.0215793},...|
|    11|[{29, 6.0225997},...|
|    12|[{32, 5.1344213},...|
|    22|[{65, 5.0562215},...|
|     2|[{47, 6.5279336},...|
|    13|[{52, 4.3156037},...|
+------+--------------------+
only showing top 10 rows



In [None]:
movieRecs.show(10)

+-------+--------------------+
|movieId|     recommendations|
+-------+--------------------+
|     20|[{17, 4.6680107},...|
|     40|[{2, 3.8451772}, ...|
|     10|[{17, 3.8227758},...|
|     50|[{12, 4.119929}, ...|
|     80|[{23, 4.2882237},...|
|     70|[{4, 3.960398}, {...|
|     60|[{10, 2.8929265},...|
|     90|[{26, 7.1886744},...|
|     30|[{3, 5.242028}, {...|
|      0|[{28, 2.7726943},...|
+-------+--------------------+
only showing top 10 rows

