In [18]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.sql import Row

In [26]:
spark = SparkSession.builder.appName('Recommendations').getOrCreate()
ratings = spark.read.option("inferSchema",True).csv("/Users/yangguangqiang/Music/career-2021/recommend system/ml-latest-small/ratings.csv",header=True)
ratings

DataFrame[userId: int, movieId: int, rating: double, timestamp: int]

In [27]:
(training, test) = ratings.randomSplit([0.8, 0.2])   #random切分数据
als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating",
          coldStartStrategy="drop")
model = als.fit(training)

# Evaluate the model by computing the RMSE on the test data
predictions = model.transform(test)
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
                                predictionCol="prediction")
rmse = evaluator.evaluate(predictions)
print("Root-mean-square error = " + str(rmse))

# Generate top 10 movie recommendations for each user
userRecs = model.recommendForAllUsers(10)
# Generate top 10 user recommendations for each movie
movieRecs = model.recommendForAllItems(10)

Root-mean-square error = 1.0834762753124627




In [32]:
userRecs.show() #

+------+--------------------+
|userId|     recommendations|
+------+--------------------+
|     1|[{3508, 6.138125}...|
|     3|[{4518, 5.188017}...|
|     5|[{7842, 7.7527103...|
|     6|[{2769, 7.2565756...|
|     9|[{95441, 7.635063...|
|    12|[{69278, 8.874403...|
|    13|[{89904, 8.274796...|
|    15|[{5047, 8.003167}...|
|    16|[{930, 4.860158},...|
|    17|[{1719, 6.118033}...|
|    19|[{322, 4.9465017}...|
|    20|[{4642, 7.4092417...|
|    22|[{2901, 10.397148...|
|    26|[{7842, 6.0010376...|
|    27|[{37384, 7.400357...|
|    28|[{7842, 5.1065545...|
|    31|[{674, 8.473235},...|
|    34|[{8965, 9.353524}...|
|    35|[{5048, 13.062377...|
|    37|[{674, 16.854443}...|
+------+--------------------+
only showing top 20 rows



In [30]:
movieRecs.show()

+-------+--------------------+
|movieId|     recommendations|
+-------+--------------------+
|      1|[{259, 6.557937},...|
|     12|[{147, 15.135076}...|
|     13|[{147, 5.4637766}...|
|     22|[{77, 6.285603}, ...|
|     26|[{174, 6.3273377}...|
|     27|[{258, 10.956926}...|
|     28|[{258, 8.236999},...|
|     31|[{77, 8.410044}, ...|
|     34|[{37, 6.150867}, ...|
|     44|[{259, 7.47868}, ...|
|     47|[{147, 7.6709957}...|
|     52|[{147, 8.845782},...|
|     53|[{157, 9.428875},...|
|     65|[{147, 7.7780914}...|
|     76|[{363, 11.891413}...|
|     78|[{77, 6.416016}, ...|
|     81|[{22, 7.988296}, ...|
|     85|[{467, 12.787369}...|
|     93|[{557, 8.15063}, ...|
|    101|[{407, 7.63058}, ...|
+-------+--------------------+
only showing top 20 rows

