In [1]:
from pyspark.sql import SparkSession
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS

In [2]:
spark = SparkSession.builder.appName('Recommender System CF').getOrCreate()

In [3]:
rat = spark.read.csv('rat.csv', inferSchema=True, header=True)

In [4]:
rat.printSchema()

root
 |-- userId: integer (nullable = true)
 |-- movieId: integer (nullable = true)
 |-- rating: double (nullable = true)
 |-- timestamp: integer (nullable = true)



In [5]:
data = rat.drop('timestamp')

In [6]:
data.show()

+------+-------+------+
|userId|movieId|rating|
+------+-------+------+
|     1|     31|   2.5|
|     1|   1029|   3.0|
|     1|   1061|   3.0|
|     1|   1129|   2.0|
|     1|   1172|   4.0|
|     1|   1263|   2.0|
|     1|   1287|   2.0|
|     1|   1293|   2.0|
|     1|   1339|   3.5|
|     1|   1343|   2.0|
|     1|   1371|   2.5|
|     1|   1405|   1.0|
|     1|   1953|   4.0|
|     1|   2105|   4.0|
|     1|   2150|   3.0|
|     1|   2193|   2.0|
|     1|   2294|   2.0|
|     1|   2455|   2.5|
|     1|   2968|   1.0|
|     1|   3671|   3.0|
+------+-------+------+
only showing top 20 rows



In [7]:
data.select(('rating')).describe().show()

+-------+------------------+
|summary|            rating|
+-------+------------------+
|  count|            100004|
|   mean| 3.543608255669773|
| stddev|1.0580641091070326|
|    min|               0.5|
|    max|               5.0|
+-------+------------------+



In [8]:
(training, test) = data.randomSplit([0.8, 0.2])

In [9]:
als = ALS(maxIter=5,regParam=0.01, userCol='userId', itemCol='movieId', ratingCol='rating')

In [10]:
model = als.fit(training)

In [11]:
predictions = model.transform(test)

In [12]:
predictions.show()

+------+-------+------+----------+
|userId|movieId|rating|prediction|
+------+-------+------+----------+
|    30|    463|   4.0| 2.9274898|
|    85|    471|   3.0| 1.9460993|
|   460|    471|   5.0| 4.5281115|
|   350|    471|   3.0|  3.565811|
|   306|    471|   3.0| 3.6842525|
|   491|    471|   3.0| 2.9561155|
|   514|    471|   4.0| 4.2804236|
|    30|    471|   4.0| 3.3044834|
|   547|    496|   3.0| 2.8261738|
|   580|   1088|   3.0| 3.1876535|
|   133|   1088|   1.5| 1.6899627|
|   372|   1088|   4.0| 3.2355645|
|    52|   1088|   4.0| 4.0418725|
|   363|   1088|   2.0| 3.8845522|
|    57|   1088|   4.0| 3.5915124|
|   262|   1088|   2.0| 1.9665437|
|   531|   1088|   5.0|  2.821863|
|   509|   1088|   2.0|  2.974636|
|   418|   1088|   5.0|   3.07216|
|   344|   1088|   3.0| 2.4288683|
+------+-------+------+----------+
only showing top 20 rows



In [13]:
mov = spark.read.csv('mov.csv', header=True, inferSchema=True)

In [14]:
res=predictions.join(mov, on=['movieId'], how='left')

In [15]:
res.show()

+-------+------+------+----------+--------------------+--------------------+
|movieId|userId|rating|prediction|               title|              genres|
+-------+------+------+----------+--------------------+--------------------+
|    463|    30|   4.0| 2.9274898|Guilty as Sin (1993)|Crime|Drama|Thriller|
|    471|    85|   3.0| 1.9460993|Hudsucker Proxy, ...|              Comedy|
|    471|   460|   5.0| 4.5281115|Hudsucker Proxy, ...|              Comedy|
|    471|   350|   3.0|  3.565811|Hudsucker Proxy, ...|              Comedy|
|    471|   306|   3.0| 3.6842525|Hudsucker Proxy, ...|              Comedy|
|    471|   491|   3.0| 2.9561155|Hudsucker Proxy, ...|              Comedy|
|    471|   514|   4.0| 4.2804236|Hudsucker Proxy, ...|              Comedy|
|    471|    30|   4.0| 3.3044834|Hudsucker Proxy, ...|              Comedy|
|    496|   547|   3.0| 2.8261738|What Happened Was...|Comedy|Drama|Roma...|
|   1088|   580|   3.0| 3.1876535|Dirty Dancing (1987)|Drama|Musical|Rom...|

In [16]:
user_126 = res.filter(res['userId']==126).select(['userId', 'title', 'genres'])

In [17]:
user_126.show()

+------+--------------------+--------------------+
|userId|               title|              genres|
+------+--------------------+--------------------+
|   126|Jungle Book, The ...|Adventure|Childre...|
|   126|  Hoop Dreams (1994)|         Documentary|
|   126|     Net, The (1995)|Action|Crime|Thri...|
|   126|Walk in the Cloud...|       Drama|Romance|
|   126|Terminator 2: Jud...|       Action|Sci-Fi|
|   126|        Congo (1995)|Action|Adventure|...|
|   126| Broken Arrow (1996)|Action|Adventure|...|
|   126|       Junior (1994)|       Comedy|Sci-Fi|
|   126|Fugitive, The (1993)|            Thriller|
|   126| Black Beauty (1994)|Adventure|Childre...|
|   126|Clear and Present...|Action|Crime|Dram...|
|   126|      Jumanji (1995)|Adventure|Childre...|
|   126|Aristocats, The (...|  Animation|Children|
|   126|Addams Family Val...|Children|Comedy|F...|
+------+--------------------+--------------------+

