In [1]:
!pip install pyspark py4j

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.4.0.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.0-py2.py3-none-any.whl size=311317130 sha256=8177710fa4a4ce503e4a2c1a443e3d56178d1e07195ba58ec8b0a1ae0ba28f3d
  Stored in directory: /root/.cache/pip/wheels/7b/1b/4b/3363a1d04368e7ff0d408e57ff57966fcdf00583774e761327
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.0


In [2]:
from pyspark.sql import SparkSession

In [4]:
spark = SparkSession.builder.appName('movie').getOrCreate()

In [5]:
data = spark.read.csv('movielens_ratings.csv',inferSchema=True, header=True)

In [6]:
data.show()

+-------+------+------+
|movieId|rating|userId|
+-------+------+------+
|      2|   3.0|     0|
|      3|   1.0|     0|
|      5|   2.0|     0|
|      9|   4.0|     0|
|     11|   1.0|     0|
|     12|   2.0|     0|
|     15|   1.0|     0|
|     17|   1.0|     0|
|     19|   1.0|     0|
|     21|   1.0|     0|
|     23|   1.0|     0|
|     26|   3.0|     0|
|     27|   1.0|     0|
|     28|   1.0|     0|
|     29|   1.0|     0|
|     30|   1.0|     0|
|     31|   1.0|     0|
|     34|   1.0|     0|
|     37|   1.0|     0|
|     41|   2.0|     0|
+-------+------+------+
only showing top 20 rows



In [7]:
from pyspark.ml.recommendation import ALS

In [8]:
from pyspark.ml.evaluation import RegressionEvaluator

In [9]:
data.describe().show()

+-------+------------------+------------------+------------------+
|summary|           movieId|            rating|            userId|
+-------+------------------+------------------+------------------+
|  count|              1501|              1501|              1501|
|   mean| 49.40572951365756|1.7741505662891406|14.383744170552964|
| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|
|    min|                 0|               1.0|                 0|
|    max|                99|               5.0|                29|
+-------+------------------+------------------+------------------+



In [11]:
training,test = data.randomSplit([0.8,0.2])

In [12]:
als = ALS(maxIter = 5, regParam=0.01, userCol='userId',itemCol = 'movieId',ratingCol='rating') 

In [14]:
als_model = als.fit(training)

In [16]:
predictions = als_model.transform(test)

In [17]:
predictions.show()

+-------+------+------+-----------+
|movieId|rating|userId| prediction|
+-------+------+------+-----------+
|      3|   1.0|    28| -1.0837516|
|      0|   1.0|    22|  0.5507598|
|      5|   2.0|    22| -0.5912574|
|      5|   1.0|    13|  2.0051832|
|      6|   1.0|    13|  0.4963708|
|      0|   1.0|    20|  1.0030634|
|      3|   1.0|     9|-0.14452438|
|      5|   1.0|     8|   3.773305|
|      6|   2.0|    23|  1.3595606|
|      2|   2.0|     7| -0.7760804|
|      4|   1.0|     7|  1.2490206|
|      7|   1.0|     7|  4.4470334|
|      0|   3.0|    10|-0.22927204|
|      7|   1.0|    25|    2.34634|
|      4|   1.0|    29|  1.5205306|
|      2|   4.0|    21|   3.925638|
|      0|   1.0|    11| 0.82289046|
|      4|   1.0|    14| 0.29416555|
|      6|   1.0|    14|  2.3863883|
|      2|   3.0|     0| -2.8354392|
+-------+------+------+-----------+
only showing top 20 rows



In [20]:
evaluator = RegressionEvaluator(metricName='rmse',labelCol='rating',predictionCol='prediction')

In [23]:
rmse = evaluator.evaluate(predictions)
rmse

2.276856066899663

In [24]:
single_user = test.filter(test['userId']==11).select(['movieId','userId'])

In [25]:
single_user.show()

+-------+------+
|movieId|userId|
+-------+------+
|      0|    11|
|     13|    11|
|     19|    11|
|     25|    11|
|     35|    11|
|     38|    11|
|     39|    11|
|     40|    11|
|     50|    11|
|     51|    11|
|     64|    11|
|     79|    11|
+-------+------+



In [26]:
recommendations = als_model.transform(single_user)

In [27]:
recommendations.orderBy('prediction',ascending=False).show()

+-------+------+-----------+
|movieId|userId| prediction|
+-------+------+-----------+
|     35|    11|  2.4809632|
|     64|    11|  2.1812723|
|     50|    11|  1.1570115|
|     13|    11| 0.92365324|
|      0|    11| 0.82289046|
|     19|    11|  0.4037799|
|     39|    11|  -0.363097|
|     38|    11|-0.50289917|
|     25|    11| -1.3100042|
|     79|    11| -1.7358514|
|     51|    11| -2.7608528|
|     40|    11|  -5.230209|
+-------+------+-----------+

