# Setup

In [1]:
from pyspark.sql import SparkSession 
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS, ALSModel
from pyspark.ml.tuning import TrainValidationSplit,CrossValidator, ParamGridBuilder, CrossValidatorModel
from pyspark.sql.functions import col, explode, when

__Pyspark setup__

In [2]:
import os
import sys

os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

In [3]:
spark = SparkSession.builder \
    .master('local') \
    .appName('BRS-pyspark') \
    .config("spark.driver.memory", "12g") \
    .config("spark.driver.extraJavaOptions", "-Xss12M") \
    .getOrCreate()

spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

In [4]:
spark

---

# Loading the datasets

In [5]:
books_df = spark.read.csv("../datasets/clean/filtered_datasets/Final/final_books.csv",header=True,inferSchema=True)
book_ratings = spark.read.csv("../datasets/clean/filtered_datasets/Final/final_ratings.csv",header=True,inferSchema=True)

In [6]:
book_ratings.show(2)

+-------+-----------+-------+
|User-ID|Book-Rating|Book-Id|
+-------+-----------+-------+
| 277427|          9|      9|
| 277427|          9|    278|
+-------+-----------+-------+
only showing top 2 rows



In [8]:
mPath =  "./models/model_1_50_100_07"
persistedModel = CrossValidatorModel.load(mPath)

# predict
predictionsDF = persistedModel.transform(book_ratings)

In [10]:
predictionsDF.show()

+-------+-----------+-------+----------+
|User-ID|Book-Rating|Book-Id|prediction|
+-------+-----------+-------+----------+
| 277427|          9|      9|  8.403077|
| 277427|          9|    278|  8.473743|
| 277427|         10|    202|  8.555579|
| 277427|         10|    159| 7.5709944|
| 277427|          9|     63|  8.107115|
| 277427|         10|    481|  9.597159|
| 278026|          9|   1122|  7.995034|
| 278026|          8|    226| 7.3062773|
| 278026|          8|   1203|  7.337729|
| 278418|         10|    640|  8.299643|
| 278418|          8|     36| 7.6102643|
| 278418|          7|    692| 7.3070483|
| 278582|          7|    292| 7.3428893|
| 278582|          7|    260| 6.7946076|
| 278582|         10|     52| 7.8803596|
| 278633|          7|     27|  6.515313|
| 278633|          7|    336|  5.915984|
| 278633|          7|    428|  6.530921|
|    254|          5|    494|  5.174255|
|    254|          9|    205|  5.930076|
+-------+-----------+-------+----------+
only showing top

In [14]:
model = persistedModel.bestModel

In [11]:
def scale_ratings(predictions):
    # Get the maximum and minimum predicted ratings
    max_rating = predictions.selectExpr("max(`Predicted-Rating`)").collect()[0][0]
    min_rating = 1

    # Scale the ratings to the range [0, 10]
    scaled_predictions = predictions.withColumn("Scaled-Rating", ((col("Predicted-Rating") - min_rating) / (max_rating - min_rating)) * 10)

    # Reorder the columns to place Scaled-Rating next to Predicted-Rating
    scaled_predictions = scaled_predictions.select(
        "User-ID", "Book-Id", "Predicted-Rating", "Scaled-Rating", 
        *[col for col in scaled_predictions.columns if col not in ["User-ID", "Book-Id", "Predicted-Rating", "Scaled-Rating"]]
    )
    
    return scaled_predictions

def recommendBooks(user_id, model, book_df):
    user_recs = model.recommendForUserSubset(spark.createDataFrame([[user_id]], ["User-ID"]), numItems=10)
    
    # Use "explode" on the array of recommendations to separate rows
    user_recs_exploded = user_recs.select(col("User-ID"), explode("recommendations").alias("recommendation"))
    
    # Get recommended books and ratings + join on their name
    recommended_books = user_recs_exploded.select(col("User-ID"), col("recommendation.Book-Id").alias("Book-Id"), col("recommendation.rating").alias("Predicted-Rating"))
    recommended_books_with_names = recommended_books.join(book_df, on="Book-Id", how="inner")

    # Scale the ratings to the range [0, 10]
    scaled_recommendations = scale_ratings(recommended_books_with_names)

    return scaled_recommendations

#### Apllying the model for some existing users

In [15]:
recommended_books = recommendBooks(4017, model, books_df).toPandas() # we use to pandas to get a cleaner output
recommended_books

Unnamed: 0,User-ID,Book-Id,Predicted-Rating,Scaled-Rating,ISBN,Book-Title,Book-Author,Publisher,Year-Of-Publication,categories,description,Image-URL-S,Image-URL-M,Image-URL-L
0,4017,346,11.90079,10.0,140444300,Les Miserables (Penguin Classics),Victor Hugo,Penguin Books,1982,Fiction,"After nineteen years in prison, Jean Valjean h...",http://images.amazon.com/images/P/0140444300.0...,http://images.amazon.com/images/P/0140444300.0...,http://images.amazon.com/images/P/0140444300.0...
1,4017,1485,11.577393,9.703326,802806414,The Pilgrim's Regress: An Allegorical Apology ...,C. S. Lewis,Wm. B. Eerdmans Publishing Company,1992,Religion,The first book written by C. S. Lewis after hi...,http://images.amazon.com/images/P/0802806414.0...,http://images.amazon.com/images/P/0802806414.0...,http://images.amazon.com/images/P/0802806414.0...
2,4017,2308,11.209471,9.365808,811201120,Henry Miller on Writing (New Directions Paperb...,Henry Miller,New Directions Publishing Corporation,1964,Literary Criticism,Some of the most rewarding pages in Henry Mill...,http://images.amazon.com/images/P/0811201120.0...,http://images.amazon.com/images/P/0811201120.0...,http://images.amazon.com/images/P/0811201120.0...
3,4017,1154,11.145123,9.306777,618260587,The Lord of the Rings,J. R. R. Tolkien,Houghton Mifflin Company,2002,Fiction,"An epic detailing the Great War of the Ring, a...",http://images.amazon.com/images/P/0618260587.0...,http://images.amazon.com/images/P/0618260587.0...,http://images.amazon.com/images/P/0618260587.0...
4,4017,2280,10.777975,8.969969,743482778,Twelfth Night (Folger Shakespeare Library),William Shakespeare,Washington Square Press,2004,Drama,"Background information about Shakespeare, Eliz...",http://images.amazon.com/images/P/0743482778.0...,http://images.amazon.com/images/P/0743482778.0...,http://images.amazon.com/images/P/0743482778.0...
5,4017,661,10.687587,8.88705,452011876,Atlas Shrugged,Ayn Rand,Plume Books,1999,Capitalism,The decisions of a few industrial leaders shak...,http://images.amazon.com/images/P/0452011876.0...,http://images.amazon.com/images/P/0452011876.0...,http://images.amazon.com/images/P/0452011876.0...
6,4017,540,10.559708,8.769738,800871863,Silence,Shusaku Endo,Parkwest Publications,1980,Fiction,"Sustained by dreams of glorious martyrdom, a s...",http://images.amazon.com/images/P/0800871863.0...,http://images.amazon.com/images/P/0800871863.0...,http://images.amazon.com/images/P/0800871863.0...
7,4017,1395,10.537687,8.749538,517543052,Be Here Now,Lama Foundation Editor,Three Rivers Press (CA),1971,"Body, Mind & Spirit",This book is enhanced with content such as aud...,http://images.amazon.com/images/P/0517543052.0...,http://images.amazon.com/images/P/0517543052.0...,http://images.amazon.com/images/P/0517543052.0...
8,4017,398,10.459037,8.677386,156004801,All the King's Men (Harvest Book),Robert Penn Warren,Harvest Books,1996,Fiction,Willie Stark's obsession with political power ...,http://images.amazon.com/images/P/0156004801.0...,http://images.amazon.com/images/P/0156004801.0...,http://images.amazon.com/images/P/0156004801.0...
9,4017,846,10.40865,8.631164,1560252480,Requiem for a Dream,Hubert Selby Jr.,Thunder's Mouth Press,2000,Fiction,"""The acclaimed novel that inspired the Darren ...",Requiem for a Dream makes it to the big scree...,Jennifer Connelly,Marlon Wayans
