In [1]:
import pandas as pd
import numpy as np
training_df = pd.read_csv('C:/Users/nafla/OneDrive/Documents/system development/Netflix/training_data.csv')
training_df.head()

Unnamed: 0,MovieID,CustomerID,Rating,Date,YearOfRelease,RatingYear,MovieAge
0,1,1488844,3,2005-09-06,2003,2005,2
1,1,822109,5,2005-05-13,2003,2005,2
2,1,885013,4,2005-10-19,2003,2005,2
3,1,30878,4,2005-12-26,2003,2005,2
4,1,823519,3,2004-05-03,2003,2004,1


In [2]:
# Calculate quantiles for user activity and item popularity
user_activity_quantiles = training_df['CustomerID'].value_counts().quantile([0.25, 0.5, 0.75])
item_popularity_quantiles = training_df['MovieID'].value_counts().quantile([0.25, 0.5, 0.75])
print(user_activity_quantiles)
print(item_popularity_quantiles)

0.25     8.0
0.50    24.0
0.75    64.0
Name: CustomerID, dtype: float64
0.25     192.0
0.50     552.5
0.75    2539.0
Name: MovieID, dtype: float64


In [3]:
training_df['CustomerID'] = training_df['CustomerID'].astype(str)
training_df['MovieID'] = training_df['MovieID'].astype(str)
training_df['Rating'] = pd.to_numeric(training_df['Rating'], errors='coerce')  # Converts to float, makes non-numeric as NaN

In [4]:
# Check data types
print("Before conversion:")
print(training_df.dtypes)

Before conversion:
MovieID          object
CustomerID       object
Rating            int64
Date             object
YearOfRelease     int64
RatingYear        int64
MovieAge          int64
dtype: object


# Stratified Sampling Method 

To create a representative sample of our dataset, we employ a stratified sampling method that accounts for three key dimensions: Rating Distribution, User Activity, and Item Popularity. This approach ensures our sample maintains the diversity and characteristics of the entire dataset, facilitating more reliable model training and evaluation.

- User Activity is quantified by the number of ratings a user has provided.
- Item Popularity reflects the number of ratings an item has received.

Finally, We combine User Activity, Item Popularity, and Rating into a composite stratification key for each record. This multi-dimensional key ensures our sampling process considers the distribution across all three axes.

In [5]:
# Assign each user and item to a bin based on the quantiles
training_df['UserActivityBin'] = pd.qcut(training_df.groupby('CustomerID')['Rating'].transform('size'), 
                                q=[0, .25, .5, .75, 1], labels=['low', 'medium', 'medium-high', 'high'])

training_df['ItemPopularityBin'] = pd.qcut(training_df.groupby('MovieID')['Rating'].transform('size'), 
                                  q=[0, .25, .5, .75, 1], labels=['low', 'medium', 'medium-high', 'high'])

# Combine these with Rating to create a stratification key
training_df['Strata'] = training_df['UserActivityBin'].astype(str) + "_" + training_df['ItemPopularityBin'].astype(str) + "_" + training_df['Rating'].astype(str)

# Perform stratified sampling
# we use groupby and  frac to specify a fraction of each strata and in case number of rows is less that 10 it takes all rows
strat_sample_df = training_df.groupby('Strata').apply(lambda x: x.sample(frac=0.005 if len(x) > 10 else len(x)/len(x))).reset_index(drop=True)


In [6]:
num_sampled_rows = len(strat_sample_df)
print(f"Number of rows in the sampled DataFrame: {num_sampled_rows}")

Number of rows in the sampled DataFrame: 120269


# Splitting dataset to training, test, validation

In [7]:
from sklearn.model_selection import train_test_split

# Assuming strat_sample_df is your entire dataset
user_ids = strat_sample_df['CustomerID'].unique()
movie_ids = strat_sample_df['MovieID'].unique()

# Create mappings based on the entire dataset
user_id_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
movie_id_to_index = {movie_id: index for index, movie_id in enumerate(movie_ids)}

# Now, split your dataset
training_data, testing_data = train_test_split(strat_sample_df, test_size=0.2, random_state=42)



In [8]:
def map_ids_to_indices(df, user_id_to_index, movie_id_to_index):
    """
    Map user IDs and movie IDs to their respective indices.

    Parameters:
    - df: DataFrame containing 'CustomerID', 'MovieID', and other columns.
    - user_id_to_index: Dictionary mapping user IDs to indices.
    - movie_id_to_index: Dictionary mapping movie IDs to indices.

    Returns:
    - DataFrame with added columns 'UserIndex' and 'MovieIndex' for the respective indices.
    """

    # Copy to avoid modifying the original DataFrame
    modified_data = df.copy()
    
    # Map 'CustomerID' to 'UserIndex'
    modified_data['UserIndex'] = modified_data['CustomerID'].map(user_id_to_index)
    
    # Map 'MovieID' to 'MovieIndex'
    modified_data['MovieIndex'] = modified_data['MovieID'].map(movie_id_to_index)
    
    # Optional: drop rows where either UserIndex or MovieIndex is NaN (i.e., ID wasn't found)
    modified_data.dropna(subset=['UserIndex', 'MovieIndex'], inplace=True)
    
    # Convert indices to integers (they might be floats due to NaN handling)
    modified_data['UserIndex'] = modified_data['UserIndex'].astype(int)
    modified_data['MovieIndex'] = modified_data['MovieIndex'].astype(int)
    
    return modified_data

In [9]:
mapped_training_data = map_ids_to_indices(training_data,user_id_to_index, movie_id_to_index)

In [21]:
# Calculate the size of each split
training_size = training_data.shape[0]  # Number of rows in the training data
training_size_mapp = mapped_training_data.shape[0]  # Number of rows in the training data
# validation_size = validation_data.shape[0]  # Number of rows in the validation data
testing_size = testing_data.shape[0]  # Number of rows in the testing data

# Print the sizes
print(f"Training Data Size: {training_size}")
print(f"Training Data Size: {training_size_mapp}")
# print(f"Validation Data Size: {validation_size}")
print(f"Testing Data Size: {testing_size}")

Training Data Size: 96215
Training Data Size: 96215
Testing Data Size: 24054


In [25]:
# Assuming final_training_data, validation_data, and testing_data are your data splits

# Count unique MovieIDs in the final training data
unique_users_training = mapped_training_data['CustomerID'].nunique()

# Count unique MovieIDs in the validation data
# unique_movies_validation = validation_data['MovieID'].nunique()

# Count unique MovieIDs in the testing data
unique_movies_testing = testing_data['CustomerID'].nunique()

# Print the counts
print(f"Unique CustomerIDs in Training Data: {unique_users_training}")
# print(f"Unique MovieIDs in Validation Data: {unique_movies_validation}")
print(f"Unique MovieIDs in Testing Data: {unique_movies_testing}")


Unique CustomerIDs in Training Data: 74675
Unique MovieIDs in Testing Data: 22352


# Creating User - Item matrix

In [12]:
# Creating customer-movie matrix
import pandas as pd
from scipy.sparse import csr_matrix

# user_ids = training_data['CustomerID'].unique()
# movie_ids = training_data['MovieID'].unique()

# user_id_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
# movie_id_to_index = {movie_id: index for index, movie_id in enumerate(movie_ids)}


# Extract rows, columns, and data for CSR matrix
# rows = training_data['UserIndex'].values
# cols = training_data['MovieIndex'].values
# data = training_data['Rating'].values

# # Calculate the shape of the matrix
# num_users = len(user_id_to_index)
# num_movies = len(movie_id_to_index)

# # Create the CSR matrix
# ratings_csr_matrix = csr_matrix((data, (rows, cols)), shape=(num_users, num_movies))

# print(ratings_csr_matrix)

# Extract the rows (user indices), columns (movie indices), and data (ratings) for the CSR matrix
rows = mapped_training_data['UserIndex'].values
cols = mapped_training_data['MovieIndex'].values
data = mapped_training_data['Rating'].values

# Determine the shape of the CSR matrix
# The shape is (max_user_index + 1, max_movie_index + 1) because indices start from 0
num_users = mapped_training_data['UserIndex'].max() + 1
num_movies = mapped_training_data['MovieIndex'].max() + 1

# Create the CSR matrix
ratings_csr_matrix = csr_matrix((data, (rows, cols)), shape=(num_users, num_movies))

print(ratings_csr_matrix)

  (0, 25)	3
  (0, 2383)	4
  (0, 2522)	3
  (1, 0)	1
  (1, 44)	5
  (1, 2369)	3
  (2, 1)	1
  (3, 2664)	4
  (4, 0)	1
  (4, 1912)	4
  (5, 3)	1
  (5, 18)	3
  (6, 4)	1
  (6, 532)	1
  (6, 1151)	4
  (7, 0)	1
  (8, 5)	1
  (9, 6)	1
  (10, 0)	1
  (10, 2562)	5
  (11, 7)	1
  (12, 8)	1
  (12, 399)	3
  (12, 1470)	3
  (13, 9)	1
  :	:
  (88611, 2610)	5
  (88612, 2728)	5
  (88613, 2641)	5
  (88614, 2572)	5
  (88615, 2566)	5
  (88616, 2610)	5
  (88617, 2624)	5
  (88618, 2637)	5
  (88619, 2645)	5
  (88620, 2659)	5
  (88622, 2690)	5
  (88623, 2597)	5
  (88625, 2654)	5
  (88626, 2486)	5
  (88627, 2540)	5
  (88628, 2566)	5
  (88629, 2634)	5
  (88631, 2687)	5
  (88632, 2697)	5
  (88633, 2719)	5
  (88634, 2617)	5
  (88635, 2487)	5
  (88636, 2607)	5
  (88637, 2575)	5
  (88638, 2556)	5


# Define similarity function for each given user

In [13]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

cosine_similarity_matrix_csr = cosine_similarity(ratings_csr_matrix, dense_output=False)
    



In [14]:
print(cosine_similarity_matrix_csr)

  (0, 87742)	0.5144957554275265
  (0, 87336)	0.5144957554275265
  (0, 86833)	0.5144957554275265
  (0, 86712)	0.5144957554275265
  (0, 86050)	0.5144957554275265
  (0, 85568)	0.5144957554275265
  (0, 85558)	0.5144957554275265
  (0, 85116)	0.5144957554275265
  (0, 84743)	0.5144957554275265
  (0, 84532)	0.5144957554275265
  (0, 84099)	0.5144957554275265
  (0, 83997)	0.5144957554275265
  (0, 83965)	0.4601789933084222
  (0, 83920)	0.5144957554275265
  (0, 83897)	0.5144957554275265
  (0, 83696)	0.5144957554275265
  (0, 83610)	0.5144957554275265
  (0, 81093)	0.5144957554275265
  (0, 79938)	0.3086974532565159
  (0, 78973)	0.36380343755449945
  (0, 74019)	0.41159660434202117
  (0, 73188)	0.4411764705882352
  (0, 70513)	0.15339299776947407
  (0, 68433)	0.3086974532565159
  (0, 67255)	0.2300894966542111
  :	:
  (88638, 13777)	1.0
  (88638, 13775)	1.0
  (88638, 13218)	1.0
  (88638, 13104)	1.0
  (88638, 12745)	1.0
  (88638, 12694)	1.0
  (88638, 12340)	0.3713906763541037
  (88638, 11232)	0.7071067811

In [15]:
print(cosine_similarity_matrix_csr.shape)

(88639, 88639)


# Predict ratings using similarities

In [16]:
def predict_rating_with_similarity_matrix(csr_user_item_matrix, similarity_matrix, user_index, movie_index, k):
    """
    Predict the rating for a given movie by a target user, based on the ratings of top-k similar users.
    This function uses a pre-calculated similarity matrix.
    
    Parameters:
    - csr_user_item_matrix: CSR matrix representing the user-item matrix.
    - similarity_matrix: CSR matrix representing the similarity scores between users.
    - user_index: The index of the user for whom the rating is being predicted.
    - movie_index: The index of the movie for which the rating is being predicted.
    - k: Number of top similar users to consider for prediction.
    
    Returns:
    - Predicted rating for the movie by the target user.
    """
    # Step 1: Identify users who have rated the movie
    movie_rated_indices = csr_user_item_matrix[:, movie_index].nonzero()[0]
    
    # Check if user_index exists in the similarity matrix
    if user_index < 0 or user_index >= similarity_matrix.shape[0]:
        # If user_index does not exist in similarity matrix, return default prediction
        overall_average_rating = csr_user_item_matrix[:, movie_index].data.mean()
        return overall_average_rating if np.isfinite(overall_average_rating) else 3.0  # Assuming 3.0 as a neutral rating
    
    # Step 2: Extract similarity scores for the target user with all other users
    user_similarities = similarity_matrix.getrow(user_index).toarray().flatten()
    
    # Step 3: Filter the similarities for users who have rated the movie
    filtered_similarities = user_similarities[movie_rated_indices]
    
    # Step 4: Get indices of top k similar users among those who have rated the movie
    top_k_indices = np.argsort(filtered_similarities)[-k:]
    top_k_users_indices = movie_rated_indices[top_k_indices]
    top_k_similarities = filtered_similarities[top_k_indices]

    # Retrieve ratings for the movie from these top-k similar users
    top_k_ratings = csr_user_item_matrix[top_k_users_indices, movie_index].toarray().flatten()

    # Calculate the weighted average rating
    weighted_sum = np.dot(top_k_similarities, top_k_ratings)
    similarity_sum = np.sum(top_k_similarities)
    
    if similarity_sum > 0:
        predicted_rating = weighted_sum / similarity_sum
    else:
        # Use the overall average rating of the movie by all users as the default rating
        overall_average_rating = csr_user_item_matrix[:, movie_index].data.mean()
        predicted_rating = overall_average_rating if np.isfinite(overall_average_rating) else 3.0  # Assuming 3.0 as a neutral rating

    return predicted_rating


In [17]:

from sklearn.metrics import mean_squared_error
from math import sqrt
def evaluate_predictions_csr(validation_data, csr_user_item_matrix, similarity_matrix, k):
    """
    Evaluate the recommendation system by predicting ratings for each user-movie pair in the validation set
    using a CSR matrix and pre-computed similarity matrix, and comparing the predictions to the actual ratings using RMSE.

    Parameters:
    - validation_data: DataFrame containing 'UserIndex', 'MovieIndex', and 'Rating'.
    - csr_user_item_matrix: CSR matrix representing the user-item matrix from the training set.
    - similarity_matrix: Pre-computed similarity matrix as a CSR matrix.
    - k: The number of top similar users to consider when making predictions.
    
    Returns:
    - rmse: The root mean square error of the predicted ratings against the actual ratings.
    """
    actual_ratings = []
    predicted_ratings = []

    for _, row in validation_data.iterrows():
        user_index = row['UserIndex']
        movie_index = row['MovieIndex']
        actual_rating = row['Rating']
        
        # Check if the movie index is valid
        if movie_index in csr_user_item_matrix.indices:
            predicted_rating = predict_rating_with_similarity_matrix(csr_user_item_matrix, similarity_matrix, user_index, movie_index, k)
            actual_ratings.append(actual_rating)
            predicted_ratings.append(predicted_rating)
    
    # Calculate RMSE between actual and predicted ratings
    actual_ratings = np.array(actual_ratings)
    predicted_ratings = np.array(predicted_ratings)
    valid_mask = ~np.isnan(predicted_ratings)
    rmse = sqrt(mean_squared_error(actual_ratings[valid_mask], predicted_ratings[valid_mask]))

    return rmse


In [18]:
def map_ids_to_indices(df, user_id_to_index, movie_id_to_index):
    """
    Map user IDs and movie IDs to their respective indices.

    Parameters:
    - df: DataFrame containing 'CustomerID', 'MovieID', and other columns.
    - user_id_to_index: Dictionary mapping user IDs to indices.
    - movie_id_to_index: Dictionary mapping movie IDs to indices.

    Returns:
    - DataFrame with added columns 'UserIndex' and 'MovieIndex' for the respective indices.
    """

    # Copy to avoid modifying the original DataFrame
    modified_data = df.copy()
    
    # Map 'CustomerID' to 'UserIndex'
    modified_data['UserIndex'] = modified_data['CustomerID'].map(user_id_to_index)
    
    # Map 'MovieID' to 'MovieIndex'
    modified_data['MovieIndex'] = modified_data['MovieID'].map(movie_id_to_index)
    
    # Optional: drop rows where either UserIndex or MovieIndex is NaN (i.e., ID wasn't found)
    modified_data.dropna(subset=['UserIndex', 'MovieIndex'], inplace=True)
    
    # Convert indices to integers (they might be floats due to NaN handling)
    modified_data['UserIndex'] = modified_data['UserIndex'].astype(int)
    modified_data['MovieIndex'] = modified_data['MovieIndex'].astype(int)
    
    return modified_data

In [19]:
mapped_train_data = map_ids_to_indices (training_data, user_id_to_index, movie_id_to_index)

In [28]:
from sklearn.model_selection import KFold
import numpy as np

# Assuming k_values to test and your similarity matrix is already defined
k_values = [5, 20, 50, 100, 200]
similarity_matrix = cosine_similarity_matrix_csr
# Setup KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize results storage
results = []

for k in k_values:
    fold_rmses = []  # Store RMSEs for each fold

    for train_indices, test_indices in kf.split(ratings_csr_matrix):
        # Splitting your data: ratings_csr_matrix doesn't change, so you just map validation set indices
        validation_data_fold = mapped_train_data.iloc[test_indices]

        # Evaluate predictions on this fold's test set
        rmse = evaluate_predictions_csr(validation_data_fold, ratings_csr_matrix, similarity_matrix, k)
        fold_rmses.append(rmse)

    # Calculate average RMSE for this k over all folds
    avg_rmse = np.mean(fold_rmses)
    results.append((k, avg_rmse))
    print(f"k={k}, Average RMSE={avg_rmse}")

# Find the best k value based on average RMSE
best_k, best_rmse = min(results, key=lambda x: x[1])
print(f"Best k: {best_k} with RMSE: {best_rmse}")


k=5, Average RMSE=1.036700203350159
k=20, Average RMSE=1.0673296373556096
k=50, Average RMSE=1.0153982283256808
k=100, Average RMSE=0.9958867788327659
k=200, Average RMSE=0.9747812369560493
Best k: 200 with RMSE: 0.9747812369560493


In [26]:
mapped_testing_data = map_ids_to_indices(testing_data, user_id_to_index, movie_id_to_index)

In [29]:
# Usage example
similarity_matrix = cosine_similarity_matrix_csr
k = 200  # Example value for k
rmse = evaluate_predictions_csr(mapped_testing_data, ratings_csr_matrix, similarity_matrix, k)
print(f"RMSE: {rmse}")

RMSE: 1.0615366311315348


In [30]:
def recommend_movies(user_id, user_item_matrix, similarity_matrix, df, N=10):
    """
    Recommend top N movies for a given user using a pre-calculated similarity matrix.

    Parameters:
    - user_id: The ID of the user for whom to generate recommendations.
    - user_item_matrix: DataFrame representing the user-item matrix (users as rows, movies as columns).
    - similarity_matrix: DataFrame representing the pre-calculated similarities between users.
    - movie_titles: DataFrame or Series mapping MovieIDs to movie titles.
    - N: Number of movies to recommend.

    Returns:
    - A list of tuples with (MovieID, Movie Title, Predicted Rating) for the top N recommended movies.
    """
    # Ensure user_id is the correct type
    user_id = str(user_id)
    
    # Get the top 25 most similar users to the target user
    top_25_users = similarity_matrix.loc[user_id].sort_values(ascending=False).head(25).index
    
    # Predict ratings for movies the user hasn't seen
    predicted_ratings = {}
    for movie_id in user_item_matrix.columns:
        # Skip if the user has already rated this movie
        if not pd.isna(user_item_matrix.at[user_id, movie_id]) and user_item_matrix.at[user_id, movie_id] != 0:
            continue
        
        # Calculate the weighted average of ratings from the top 25 similar users
        total_weight = 0
        weighted_sum = 0
        for similar_user in top_25_users:
            # Check if the similar user has rated the movie
            if pd.isna(user_item_matrix.at[similar_user, movie_id]) or user_item_matrix.at[similar_user, movie_id] == 0:
                continue
            similarity_score = similarity_matrix.at[user_id, similar_user]
            rating = user_item_matrix.at[similar_user, movie_id]
            weighted_sum += similarity_score * rating
            total_weight += similarity_score
        
        # Predict the rating if there were any weights, otherwise default to 0
        predicted_rating = weighted_sum / total_weight if total_weight > 0 else 0
        predicted_ratings[movie_id] = predicted_rating
    
    # Sort the predicted ratings and select the top N
    top_n_recommendations = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)[:N]
    
    # Fetch the titles for the recommended movies
    recommendations = [(movie_id, df[movie_id], rating) for movie_id, rating in top_n_recommendations]
    
    return recommendations


In [42]:
def recommend_movies2(user_id, user_item_matrix, similarity_matrix, N=10):
    """
    Recommend top N movies for a given user using a pre-calculated similarity matrix.

    Parameters:
    - user_id: The ID of the user for whom to generate recommendations.
    - user_item_matrix: CSR matrix representing the user-item interactions matrix.
    - similarity_matrix: CSR matrix representing the pre-calculated similarities between users.
    - N: Number of movies to recommend.

    Returns:
    - A list of tuples with (MovieID, Predicted Rating) for the top N recommended movies.
    """
    # Get the row corresponding to the user_id
    user_index = int(user_id)
    similarity_row = similarity_matrix.getrow(user_index)
    
    # Get indices of top similar users
    similar_users_indices = similarity_row.indices[:200]  # Get indices of top 200 similar users
    
    # Predict ratings for movies the user hasn't seen
    predicted_ratings = {}
    for movie_id in range(user_item_matrix.shape[1]):
        # Skip if the user has already rated this movie
        if user_item_matrix[user_index, movie_id] != 0:
            continue
        
        # Calculate the weighted average of ratings from the top similar users
        total_weight = 0
        weighted_sum = 0
        for similar_user_index in similar_users_indices:
            similarity_score = similarity_matrix[user_index, similar_user_index]
            rating = user_item_matrix[similar_user_index, movie_id]
            weighted_sum += similarity_score * rating
            total_weight += similarity_score
        
        # Predict the rating if there were any weights, otherwise default to 0
        predicted_rating = weighted_sum / total_weight if total_weight > 0 else 0
        predicted_ratings[movie_id] = predicted_rating
    
    # Sort the predicted ratings and select the top N
    top_n_recommendations = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)[:N]
    
    return top_n_recommendations


In [35]:
main_mapped_data = map_ids_to_indices(strat_sample_df, user_id_to_index, movie_id_to_index)

In [36]:
# Extract the rows (user indices), columns (movie indices), and data (ratings) for the CSR matrix
rows = main_mapped_data['UserIndex'].values
cols = main_mapped_data['MovieIndex'].values
data = main_mapped_data['Rating'].values

# Determine the shape of the CSR matrix
# The shape is (max_user_index + 1, max_movie_index + 1) because indices start from 0
num_users = main_mapped_data['UserIndex'].max() + 1
num_movies = main_mapped_data['MovieIndex'].max() + 1

# Create the CSR matrix
Main_csr_matrix = csr_matrix((data, (rows, cols)), shape=(num_users, num_movies))

In [37]:
main_similarity_matrix = cosine_similarity(Main_csr_matrix, dense_output=False)

In [39]:
print(main_similarity_matrix.shape)

(88639, 88639)


In [44]:
# Example usage
similarity_matrix = main_similarity_matrix
user_id = '12'  # Specify the user ID for which you want to generate recommendations
recommendations = recommend_movies2(user_id, Main_csr_matrix, similarity_matrix, N=10)

# Print the recommendations
for movie_id, predicted_rating in recommendations:
    print(f"Movie ID: {movie_id},  Predicted Rating: {predicted_rating}")

Movie ID: 2365,  Predicted Rating: 0.05451489502638481
Movie ID: 2492,  Predicted Rating: 0.040374468318392726
Movie ID: 1673,  Predicted Rating: 0.03738946082324682
Movie ID: 1894,  Predicted Rating: 0.03501843850974258
Movie ID: 2534,  Predicted Rating: 0.03452330456139721
Movie ID: 2459,  Predicted Rating: 0.033943952482750395
Movie ID: 53,  Predicted Rating: 0.032299574654714176
Movie ID: 2371,  Predicted Rating: 0.0317260095516319
Movie ID: 2446,  Predicted Rating: 0.02919626375876557
Movie ID: 783,  Predicted Rating: 0.028042095617435116
