<a href="https://colab.research.google.com/github/pcamarillor/O2024_ESI3914O/blob/lab12_team03/examples/notebooks/als/lab12_team03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [26]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.sql.functions import explode

spark = SparkSession.builder \
            .appName("MovieRecommendationALS") \
            .config("spark.ui.port", "4040") \
            .getOrCreate()

spark.sparkContext.setLogLevel("ERROR")

file_path = '/content/drive/MyDrive/Colab Notebooks/sample_movielens_ratings.txt'

df = spark.read.option("delimiter", "::").csv(file_path, header=False, inferSchema=True)
df = df.withColumnRenamed("_c0", "userId").withColumnRenamed("_c1", "movieId") \
       .withColumnRenamed("_c2", "rating").withColumnRenamed("_c3", "timestamp")

df = df.dropDuplicates()

print("Ratings DF")
df.show(5)

als = ALS(
    userCol="userId",
    itemCol="movieId",
    ratingCol="rating",
    maxIter=10,
    regParam=0.1,
    rank=5,
    coldStartStrategy="drop"
)

model = als.fit(df)

user_recommendations = model.recommendForAllUsers(numItems=3)

user_recommendations.show(truncate=False)

recommendations = user_recommendations.select("userId", explode("recommendations").alias("rec"))

recommendations = recommendations.join(df, recommendations["rec.movieId"] == df["movieId"]).select(recommendations["userId"], "rec.movieId", "rec.rating")

recommendations.show(truncate=False)

predictions = model.transform(df)

predictions = predictions.na.drop(subset=["prediction"])

predictions.show(truncate=False)

evaluator = RegressionEvaluator(
    metricName="rmse",
    labelCol="rating",
    predictionCol="prediction"
)

rmse = evaluator.evaluate(predictions)
print(f"Root-mean-square error (RMSE) = {rmse}")

spark.stop()



Ratings DF
+------+-------+------+----------+
|userId|movieId|rating| timestamp|
+------+-------+------+----------+
|     3|      0|     1|1424380312|
|     4|     41|     4|1424380312|
|     7|     21|     1|1424380312|
|     8|      7|     1|1424380312|
|     8|     69|     1|1424380312|
+------+-------+------+----------+
only showing top 5 rows

+------+---------------------------------------------------+
|userId|recommendations                                    |
+------+---------------------------------------------------+
|20    |[{22, 3.5402706}, {51, 3.1765382}, {68, 3.0576973}]|
|10    |[{25, 2.6328824}, {2, 2.5126235}, {93, 2.502179}]  |
|0     |[{92, 2.280962}, {62, 2.1991308}, {25, 2.1892383}] |
|1     |[{22, 2.8800159}, {68, 2.5960696}, {90, 2.5139477}]|
|21    |[{29, 4.3295336}, {52, 4.2647533}, {63, 3.59393}]  |
|11    |[{32, 5.0769415}, {18, 4.726662}, {30, 4.623887}]  |
|12    |[{46, 5.6951323}, {55, 4.79084}, {49, 4.584455}]   |
|22    |[{51, 4.520794}, {75, 4.4288707