In [0]:
from pyspark.sql import SparkSession,functions as func

In [0]:
spark=SparkSession.builder.appName("ALS Recommendation").getOrCreate()

In [0]:
from pyspark.sql.types import StructType,StructField,IntegerType,LongType,StringType,FloatType,DoubleType

In [0]:
schema1=StructType([StructField("user_id",IntegerType(),True),StructField("movie_id",IntegerType(),True),StructField("rating",IntegerType(),True),StructField("timestamp",LongType(),True)])

In [0]:
df=spark.read.option("sep","\t").schema(schema1).csv("dbfs:/FileStore/tables/u.data")
df.show(5)

In [0]:
schema1=StructType([StructField("movieId",IntegerType(),True),StructField("title",StringType(),True),StructField("genres",StringType(),True)])

In [0]:
movies=spark.read.schema(schema1).option("header","true").csv("dbfs:/FileStore/tables/movies.csv")
movies.show(5)

In [0]:
movies.createOrReplaceTempView("movie")
df.createOrReplaceTempView("rating")
spark.sql("select r.*,m.title,m.genres from rating r inner join movie m on r.movie_id==m.movieId").show(5)

In [0]:
new_df=df.select("user_id","movie_id","rating")
new_df.show(5)

In [0]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [0]:
train_data,test_data=new_df.randomSplit([0.7,0.3],seed=1234)

In [0]:
als=ALS(userCol="user_id",itemCol="movie_id",ratingCol="rating",nonnegative=True,coldStartStrategy="drop").setMaxIter(5).setRegParam(0.01)

In [0]:
model=als.fit(train_data)

In [0]:
test_predictions=model.transform(test_data)
test_predictions.show(10)

In [0]:
evaluator=RegressionEvaluator(labelCol="rating",predictionCol="prediction",metricName="rmse")

In [0]:
RMSE=evaluator.evaluate(test_predictions)
print(RMSE)

In [0]:
# Generate top 10 movie recommendations for each user.
nrecommendations=model.recommendForAllUsers(10)
nrecommendations.limit(10).show()

In [0]:
# Generate top 10 movie recommendations for each user after exploding recommendations column.
nrecommendations=nrecommendations.withColumn("rec_exp",func.explode("recommendations")).\
                 select('user_id',func.col("rec_exp.movie_id"),func.col("rec_exp.rating"))
nrecommendations.limit(10).show()

In [0]:
nrecommendations.createOrReplaceTempView("recommend")

In [0]:
# Generate top 10 movie recommendations for each user along with title & genres.
spark.sql("select re.*,m.title,m.genres from recommend re inner join movie m on re.movie_id==m.movieId").show(10)

In [0]:
# Generate top 10 movie recommendations for user_id 31 along with title & genres.
recommend_df=spark.sql("select re.*,m.title,m.genres from recommend re inner join movie m on re.movie_id==m.movieId")
recommend_df.filter(recommend_df.user_id==31).sort('rating',ascending=False).limit(10).show()

In [0]:
# Generate top 10 user recommendations for each movie.
movieRecs=model.recommendForAllItems(10)
movieRecs.limit(10).show()

In [0]:
# Generate top 10 user recommendations for each movie after exploding recommendations column.
movieRecs=movieRecs.withColumn("rec_exp",func.explode("recommendations")).\
                 select('movie_id',func.col("rec_exp.user_id"),func.col("rec_exp.rating"))
movieRecs.limit(10).show()

In [0]:
# Generate top 10 user recommendations for each movie along with title & genres.
movieRecs.createOrReplaceTempView("movieRec")
spark.sql("select mr.*,m.title,m.genres from movieRec mr inner join movie m on mr.movie_id=m.movieId").show(10)

In [0]:
# Generate top 10 user recommendations for each movie along with title & genres.
movieRec_df=spark.sql("select mr.*,m.title,m.genres from movieRec mr inner join movie m on mr.movie_id=m.movieId")
movieRec_df.filter(movieRec_df.movie_id==31).sort('rating',ascending=False).limit(10).show()

In [0]:
# Generate top 10 movie recommendations for a specified set of users
users=new_df.select(als.getUserCol()).distinct().limit(3)
userSubsetRecs=model.recommendForUserSubset(users,10)
userSubsetRecs.show()

In [0]:
# Generate top 10 movie recommendations for a specified set of users after exploding recommendations column.
userSubsetRecs=userSubsetRecs.withColumn("rec_exp",func.explode("recommendations")).\
                 select('user_id',func.col("rec_exp.movie_id"),func.col("rec_exp.rating"))
userSubsetRecs.show()

In [0]:
# Generate top 10 movie recommendations for a specified set of users along with title & genres.
userSubsetRecs.createOrReplaceTempView("Usersubset")
spark.sql("select u.*,m.title,m.genres from Usersubset u inner join movie m on u.movie_id=m.movieId").show(10)

In [0]:
# Generate top 10 user recommendations for a specified set of movies.
movies=new_df.select(als.getItemCol()).distinct().limit(3)
movieSubSetRecs=model.recommendForItemSubset(movies,10)
movieSubSetRecs.show()

In [0]:
# Generate top 10 user recommendations for a specified set of movies after exploding recommendations column.
movieSubSetRecs=movieSubSetRecs.withColumn("rec_exp",func.explode("recommendations")).\
                 select('movie_id',func.col("rec_exp.user_id"),func.col("rec_exp.rating"))
movieSubSetRecs.show()

In [0]:
# Generate top 10 user recommendations for a specified set of movies along with title & genres.
movieSubSetRecs.createOrReplaceTempView("movieSubset")
spark.sql("select s.*,m.title,m.genres from movieSubset s inner join movie m on s.movie_id=m.movieId").show(10)

In [0]:
user_id=148
user_schema=StructType([StructField("user_id",IntegerType(),True)])
users=spark.createDataFrame([[user_id,]],user_schema)

In [0]:
recommendations=model.recommendForUserSubset(users,10)
recommendations.show()

In [0]:
display(recommendations)

user_id,recommendations
148,"List(List(1512, 11.486127), List(634, 10.046826), List(650, 9.226193), List(1598, 8.983273), List(1114, 8.9826565), List(1069, 8.96747), List(1036, 8.640676), List(536, 8.638755), List(835, 8.557542), List(1615, 8.47964))"


In [0]:
!pip install mlflow

In [0]:
import mlflow
with mlflow.start_run():
  als=ALS(userCol="user_id",itemCol="movie_id",ratingCol="rating",nonnegative=True,implicitPrefs=False,coldStartStrategy="drop",maxIter=5,regParam=0.01)
  print(type(als))
  model=als.fit(train_data)
  test_predictions=model.transform(test_data)
  print(test_predictions.show(5))
  evaluator=RegressionEvaluator(labelCol="rating",predictionCol="prediction",metricName="rmse")
  RMSE=evaluator.evaluate(test_predictions)
  print(RMSE)

In [0]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.tuning import ParamGridBuilder,CrossValidator
from pyspark.ml.evaluation import RegressionEvaluator
import mlflow

In [0]:
with mlflow.start_run():
  als=ALS(userCol="user_id",itemCol="movie_id",ratingCol="rating",nonnegative=True,implicitPrefs=False,coldStartStrategy="drop")
  print(type(als))
  param_grid=ParamGridBuilder().addGrid(als.rank,[10]).addGrid(als.regParam,[.01]).addGrid(als.maxIter,[5]).build()
  print(len(param_grid))
  evaluator=RegressionEvaluator(labelCol="rating",predictionCol="prediction",metricName="rmse")
  cv=CrossValidator(estimator=als,estimatorParamMaps=param_grid,evaluator=evaluator,numFolds=3)
  print(cv)
  model=cv.fit(train_data)
  best_model=model.bestModel
  print(type(best_model))
  print("Rank:", best_model._java_obj.parent().getRank())
  print("MaxIter:", best_model._java_obj.parent().getMaxIter())
  print("RegParam:", best_model._java_obj.parent().getRegParam())
  test_predictions = best_model.transform(test_data)
  print(test_predictions.show(5))
  RMSE = evaluator.evaluate(test_predictions)
  print(str(RMSE))

In [0]:
# Generate top 10 movie recommendations for each user.
nrecommendations1=best_model.recommendForAllUsers(10)
nrecommendations1.limit(10).show()

In [0]:
# Generate top 10 movie recommendations for each user after exploding recommendations column.
nrecommendations1=nrecommendations1.withColumn("rec_exp",func.explode("recommendations")).\
                 select('user_id',func.col("rec_exp.movie_id"),func.col("rec_exp.rating"))
nrecommendations1.limit(10).show()

In [0]:
nrecommendations1.createOrReplaceTempView("recommends")

In [0]:
# Generate top 10 movie recommendations for each user along with title & genres.
spark.sql("select re.*,m.title,m.genres from recommends re inner join movie m on re.movie_id==m.movieId").show(10)

In [0]:
# Generate top 10 movie recommendations for user_id 31 along with title & genres.
recommends_df=spark.sql("select re.*,m.title,m.genres from recommends re inner join movie m on re.movie_id==m.movieId")
recommends_df.filter(recommends_df.user_id==31).sort('rating',ascending=False).limit(10).show()

In [0]:
# Generate top 10 user recommendations for each movie.
movieRecs1=best_model.recommendForAllItems(10)
movieRecs1.limit(10).show()

In [0]:
# Generate top 10 user recommendations for each movie after exploding recommendations column.
movieRecs1=movieRecs1.withColumn("rec_exp",func.explode("recommendations")).\
                 select('movie_id',func.col("rec_exp.user_id"),func.col("rec_exp.rating"))
movieRecs1.limit(10).show()

In [0]:
# Generate top 10 user recommendations for each movie along with title & genres.
movieRecs1.createOrReplaceTempView("movieRecs")
spark.sql("select mr.*,m.title,m.genres from movieRecs mr inner join movie m on mr.movie_id=m.movieId").show(10)

In [0]:
# Generate top 10 user recommendations for each movie along with title & genres.
movieRecs_df=spark.sql("select mr.*,m.title,m.genres from movieRecs mr inner join movie m on mr.movie_id=m.movieId")
movieRecs_df.filter(movieRecs_df.movie_id==12).sort('rating',ascending=False).limit(10).show()