<h4>Import and setup</h4>

In [46]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
# import os
from surprise import BaselineOnly, Dataset, Reader, SVD, NMF, SVDpp, accuracy, PredictionImpossible, KNNWithMeans, KNNBasic
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV, PredefinedKFold
from surprise.model_selection.split import LeaveOneOut
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error, mean_absolute_error
from collections import defaultdict
import pandas as pd
import tempfile

In [47]:

np.random.seed(42)

genre_cols = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 
              'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 
              'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

column_names = ["item","title","genres","movie_name","movie_year","(no genres listed)","Action","Adventure","Animation","Children","Comedy","Crime","Documentary","Drama","Fantasy","Film-Noir","Horror","IMAX","Musical","Mystery","Romance","Sci-Fi","Thriller","War","Western","user","rating","rating_timestamp","rating_year","rating_month","rating_season,tag","tag_timestamp","cleaned_tag","tag_length","tag_year"]

data = pd.read_csv('../samples/combined_movies_ratings_tags.csv', names=column_names, index_col=False, skiprows=0)

reader = Reader(rating_scale=(1, 5))

ratings = data[['user', 'item', 'rating']]
ratings = ratings.iloc[1:]
ratings["rating"] = ratings["rating"].astype(float)

custom_data = Dataset.load_from_df(ratings, reader)

print(custom_data)

  data = pd.read_csv('../samples/combined_movies_ratings_tags.csv', names=column_names, index_col=False, skiprows=0)
  data = pd.read_csv('../samples/combined_movies_ratings_tags.csv', names=column_names, index_col=False, skiprows=0)


<surprise.dataset.DatasetAutoFolds object at 0x33b2263b0>


<h4>Train Test Split</h4>

In [48]:
# Train-test split
trainset, testset = train_test_split(custom_data, test_size=0.2)

# Convert trainset to dataframe (for content-based model)
train_df = pd.DataFrame(trainset.build_testset(), columns=['user', 'item', 'rating'])
test_df = pd.DataFrame(testset, columns=['user', 'item', 'rating'])

<h4>Get eligible users</h4>

In [49]:
# Step 1: Filter users with >= 5 test ratings
test_user_counts = test_df['user'].value_counts()
eligible_users = test_user_counts[test_user_counts >= 5].index.tolist()

# Different number of known ratings to test
known_ratings_list = [5]
# known_ratings_list = [5, 10, 15, 20]

<h4>Grid Search CV with SVD</h4>

In [50]:
# svd_param_grid = {
#     "n_factors":[50, 100],
#     "lr_all":[0.002, 0.005],
#     "reg_all": [0.02, 0.1]
# }

# svd_grid = GridSearchCV(SVD, svd_param_grid, measures=["rmse", "mae"], cv=5)
# svd_grid.fit(custom_data)

# # Train the SVD model
# svd = svd_grid.best_estimator["rmse"]

<h4>Movie data setup</h4>

In [51]:

movies = data[['item', 'title'] + genre_cols]
movies[genre_cols] = movies[genre_cols].astype(bool)
movies[genre_cols] = movies[genre_cols].astype(int)
# movies = movies.rename(columns={'movieId': 'item'})
movies = movies.drop_duplicates(subset='item', keep='first')

# Check for duplicate 'item' IDs
duplicate_items = movies[movies.duplicated(subset='item', keep=False)]

# Display the duplicates
print(duplicate_items)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movies[genre_cols] = movies[genre_cols].astype(bool)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movies[genre_cols] = movies[genre_cols].astype(int)


Empty DataFrame
Columns: [item, title, Action, Adventure, Animation, Children, Comedy, Crime, Documentary, Drama, Fantasy, Film-Noir, Horror, Musical, Mystery, Romance, Sci-Fi, Thriller, War, Western]
Index: []


<h4>Building user profiles for content-based recommender</h4>

In [52]:
# 4. Build User Profiles for Content-Based Recommender
# Step 4.1: Merge ratings and movie genres
train_merged = pd.merge(train_df[['user', 'item']], data, on=['user', 'item'], how='inner')
train_merged = train_merged[['user', 'item', 'rating', "title"]+genre_cols]
train_merged[genre_cols] = train_merged[genre_cols].astype(bool)
train_merged[genre_cols] = train_merged[genre_cols].astype(int)

# Step 4.2: Create user profiles based on genres
user_profiles = train_merged.groupby('user')[genre_cols].mean()

# Normalize user profiles (optional, helps with cosine similarity)
user_profiles = user_profiles.div(user_profiles.sum(axis=1), axis=0)


# 5. Build Item Profile Matrix
item_profiles = movies.set_index('item')[genre_cols]
item_profiles = item_profiles.div(item_profiles.sum(axis=1), axis=0)

# Count the number of ratings per user
user_rating_counts = train_df['user'].value_counts()

# Debugging: Check the shape of item_profiles
print(f"Shape of item_profiles: {item_profiles.shape}")

# Check for duplicate item_ids in item_profiles
if item_profiles.index.duplicated().any():
    print("Duplicate item_ids found in item_profiles.")
else:
    print("No duplicate item_ids found in item_profiles.")

Shape of item_profiles: (15318, 18)
No duplicate item_ids found in item_profiles.


<h4>Content based prediction function</h4>

In [53]:
# Step 4: Make Predictions
# Helper function: Content-based rating prediction
def predict_content_based(user_id, item_id):
    try:
        # Get the user profile and item profile
        user_vector = user_profiles.loc[user_id]
        
        # Ensure item_id is a valid index in item_profiles
        if item_id in item_profiles.index:
            item_vector = item_profiles.loc[item_id]
            if item_vector.ndim == 1:
                # Cosine similarity between user and item vectors
                similarity = np.dot(user_vector, item_vector) / (np.linalg.norm(user_vector) * np.linalg.norm(item_vector))
                # Scale similarity to rating scale (1-5)
                # Since similarity can be from -1 to 1, we adjust it:
                predicted_rating = 2.5 + 2.5 * similarity  # Center at 2.5, range approx 1-5
                return np.clip(predicted_rating, 1.0, 5.0)
            else:
                print(f"item_vector for item {item_id} is not 1-dimensional.")
                return train_df['rating'].mean()
        else:
            print(f"Item {item_id} not found in item_profiles.")
            return train_df['rating'].mean()
    except KeyError:
        # If user or item not found (cold start for genre), return global mean
        return train_df['rating'].mean()




<h4>Basic Weighted Hybrid Model</h4>

In [54]:
# Hybrid prediction function
def basic_weighted_hybrid_predict(user_id, item_id, algo):
    colab_weight=0.5
    content_weight=0.5

    # algo prediction
    try:
        colab_pred = algo.predict(user_id, item_id).est
    except:
        colab_pred = train_df['rating'].mean()
    
    # Content-based prediction
    content_pred = predict_content_based(user_id, item_id)
    
    # Weighted combination
    hybrid_pred = (colab_weight * colab_pred) + (content_weight * content_pred)
    return np.clip(hybrid_pred, 1.0, 5.0)

<h1>Switching Hybrid Model</h1>

In [55]:
# Hybrid prediction function
def switiching_weighted_hybrid_predict(user_id, item_id, algo):
    # Determine the number of ratings for the user
    num_ratings = user_rating_counts.get(user_id, 0)
    
    # Set weights based on the number of ratings
    if num_ratings == 0:
        colab_weight = 0.0
        content_weight = 1.0
    elif 1 <= num_ratings <= 5:
        colab_weight = 0.2
        content_weight = 0.8
    elif 6 <= num_ratings <= 10:
        colab_weight = 0.5
        content_weight = 0.5
    elif 6 <= num_ratings <= 15:
        colab_weight = 0.8
        content_weight = 0.2
    else:
        colab_weight = 1
        content_weight = 0
    
    # algo prediction
    try:
        colab_pred = algo.predict(user_id, item_id).est
    except:
        colab_pred = train_df['rating'].mean()
    
    # Content-based prediction
    content_pred = predict_content_based(user_id, item_id)
    
    # Weighted combination
    hybrid_pred = (colab_weight * colab_pred) + (content_weight * content_pred)
    return np.clip(hybrid_pred, 1.0, 5.0)


<h1>Ensemble switching hybrid (KNN and SVD)</h1>

In [56]:
# Hybrid prediction function
def ensemble_weighted_hybrid_predict(user_id, item_id, mf_algo, neighbour_algo):
    # Determine the number of ratings for the user
    num_ratings = user_rating_counts.get(user_id, 0)
    
    # Set weights based on the number of ratings
    if num_ratings == 0:
        mf_weight = 0.0
        neighbour_weight = 0.5
        content_weight = 0.5
    elif 1 <= num_ratings <= 5:
        mf_weight = 0.2
        neighbour_weight = 0.5
        content_weight = 0.3
    elif 6 <= num_ratings <= 10:
        mf_weight = 0.5
        neighbour_weight = 0.3
        content_weight = 0.2
    elif 10 <= num_ratings <= 15:
        mf_weight = 0.7
        neighbour_weight = 0.2
        content_weight = 0.1
    else:
        mf_weight = 1
        neighbour_weight = 0
        content_weight = 0
    
    # SVD prediction
    try:
        mf_pred = mf_algo.predict(user_id, item_id).est
    except:
        mf_pred = train_df['rating'].mean()
    
    # KNN prediction
    try:
        neighbour_pred = neighbour_algo.predict(user_id, item_id).est
    except:
        neighbour_pred = train_df['rating'].mean()
    
    # Content-based prediction
    content_pred = predict_content_based(user_id, item_id)
    
    # Weighted combination
    hybrid_pred = (mf_weight * mf_pred) + (neighbour_weight * neighbour_pred) + (content_weight * content_pred)
    return np.clip(hybrid_pred, 1.0, 5.0)


<h4>NDCG@K Calculation</h4>

In [57]:
def calculate_ndcg_at_k(ranked_list, ground_truth, k=10):
    if len(ranked_list) > k:
        ranked_list = ranked_list[:k]
    
    if not ground_truth:
        return 0.0
    
    # Calculate DCG
    dcg = 0.0
    for i, item in enumerate(ranked_list):
        if item in ground_truth:
            dcg += 1.0 / np.log2(i + 2)  # +2 because index starts from 0
    
    # Calculate IDCG
    idcg = 0.0
    ideal_relevant = min(len(ground_truth), k)
    idcg = sum(1.0 / np.log2(i + 2) for i in range(ideal_relevant))
    
    ndcg = dcg / idcg if idcg > 0 else 0.0
    return ndcg

<h4>Function to run hybrid models with cold start data</h4>

In [58]:
def run_hybrid_models(hybrid_functions, hybrid_names, algo, train_df, test_df, eligible_users, known_ratings_list, k=10):
    all_results = []

    # Prepare the test set for Surprise
    final_testset = [tuple(x) for x in test_df.to_numpy()]

    # Calculate the set of all movies
    all_movies = set(train_df['item'].unique())

    # Calculate the set of seen movies for each user
    seen_movies_dict = {user: set(train_df[train_df['user'] == user]['item']) for user in eligible_users}

    # Calculate the set of unseen movies for each user
    unseen_movies_dict = {user: list(all_movies - seen_movies_dict[user]) for user in eligible_users}

    for hybrid_func, hybrid_name in zip(hybrid_functions, hybrid_names):
        rmse_results = []
        mae_results = []
        mse_results = []
        ndcg_results = []

        # Iterate over each number of known ratings
        for known_ratings in known_ratings_list:
            print(f"Processing for {known_ratings} known ratings per user with {hybrid_name}...")
            
            # Step 3: Reduce training data to 'known_ratings' ratings per user for these test users (simulate cold start)
            limited_train_rows = []
            for user in eligible_users:
                user_ratings = train_df[train_df['user'] == user]
                if len(user_ratings) > known_ratings:
                    sampled = user_ratings.sample(known_ratings, random_state=42)
                else:
                    sampled = user_ratings
                limited_train_rows.append(sampled)

            # Step 4: Add all training data from non-eligible users (normal users)
            non_eligible_users_df = train_df[~train_df['user'].isin(eligible_users)]
            cold_start_train_df = pd.concat(limited_train_rows + [non_eligible_users_df], ignore_index=True)

            # Build training set for Surprise
            reader = Reader(rating_scale=(train_df['rating'].min(), train_df['rating'].max()))
            cold_start_data = Dataset.load_from_df(cold_start_train_df[['user', 'item', 'rating']], reader)
            cold_start_trainset = cold_start_data.build_full_trainset()

            # Train the SVD model (if needed)
            algo.fit(cold_start_trainset)

            # Predict and evaluate
            hybrid_predictions = []
            true_ratings = []
            ndcg_scores = []

            for (user, item, true_r) in final_testset:
                pred = hybrid_func(user, item, algo)
                if not np.isnan(pred) and not np.isnan(true_r):
                    hybrid_predictions.append(pred)
                    true_ratings.append(true_r)
                    
                    # Calculate NDCG@K for each user
                    user_test_ratings = test_df[test_df['user'] == user]
                    true_liked_movies = user_test_ratings[user_test_ratings['rating'] >= 4]['item'].tolist()
                    
                    if true_liked_movies:
                        predictions = [(movie_id, hybrid_func(user, movie_id, algo)) for movie_id in unseen_movies_dict[user]]
                        predictions.sort(key=lambda x: x[1], reverse=True)
                        top_k_movies = [movie_id for movie_id, _ in predictions[:k]]
                        
                        ndcg = calculate_ndcg_at_k(top_k_movies, true_liked_movies, k=k)
                        ndcg_scores.append(ndcg)

            # Evaluate
            rmse = np.sqrt(mean_squared_error(true_ratings, hybrid_predictions))
            mae = mean_absolute_error(true_ratings, hybrid_predictions)
            mse = mean_squared_error(true_ratings, hybrid_predictions)
            average_ndcg = np.mean(ndcg_scores)
            
            rmse_results.append((known_ratings, rmse))
            mae_results.append((known_ratings, mae))
            mse_results.append((known_ratings, mse))
            ndcg_results.append((known_ratings, average_ndcg))
            
            print(f"RMSE on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {rmse}")
            print(f"MAE on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {mae}")
            print(f"MSE on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {mse}")
            print(f"NDCG@{k} on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {average_ndcg}")

        # Create a DataFrame to store the results for the current model
        results_df = pd.DataFrame({
            'Model': hybrid_name,
            'Known Ratings': [r[0] for r in rmse_results],
            'RMSE': [r[1] for r in rmse_results],
            'MAE': [r[1] for r in mae_results],
            'MSE': [r[1] for r in mse_results],
            'NDCG@K': [r[1] for r in ndcg_results]
        })

        # Append the results to the all_results list
        all_results.append(results_df)

    # Concatenate all results into a single DataFrame
    final_results_df = pd.concat(all_results, ignore_index=True)
    return final_results_df


In [59]:
def run_ensemble_hybrid_models(hybrid_functions, hybrid_names, algo, algo_2, train_df, test_df, eligible_users, known_ratings_list, k=10):
    all_results = []

    # Prepare the test set for Surprise
    final_testset = [tuple(x) for x in test_df.to_numpy()]

    # Calculate the set of all movies
    all_movies = set(train_df['item'].unique())

    # Calculate the set of seen movies for each user
    seen_movies_dict = {user: set(train_df[train_df['user'] == user]['item']) for user in eligible_users}

    # Calculate the set of unseen movies for each user
    unseen_movies_dict = {user: list(all_movies - seen_movies_dict[user]) for user in eligible_users}

    for hybrid_func, hybrid_name in zip(hybrid_functions, hybrid_names):
        rmse_results = []
        mae_results = []
        mse_results = []
        ndcg_results = []

        # Iterate over each number of known ratings
        for known_ratings in known_ratings_list:
            print(f"Processing for {known_ratings} known ratings per user with {hybrid_name}...")
            
            # Step 3: Reduce training data to 'known_ratings' ratings per user for these test users (simulate cold start)
            limited_train_rows = []
            for user in eligible_users:
                user_ratings = train_df[train_df['user'] == user]
                if len(user_ratings) > known_ratings:
                    sampled = user_ratings.sample(known_ratings, random_state=42)
                else:
                    sampled = user_ratings
                limited_train_rows.append(sampled)

            # Step 4: Add all training data from non-eligible users (normal users)
            non_eligible_users_df = train_df[~train_df['user'].isin(eligible_users)]
            cold_start_train_df = pd.concat(limited_train_rows + [non_eligible_users_df], ignore_index=True)

            # Build training set for Surprise
            reader = Reader(rating_scale=(train_df['rating'].min(), train_df['rating'].max()))
            cold_start_data = Dataset.load_from_df(cold_start_train_df[['user', 'item', 'rating']], reader)
            cold_start_trainset = cold_start_data.build_full_trainset()

            # Train the SVD model (if needed)
            algo.fit(cold_start_trainset)
            algo_2.fit(cold_start_trainset)

            # Predict and evaluate
            hybrid_predictions = []
            true_ratings = []
            ndcg_scores = []

            for (user, item, true_r) in final_testset:
                pred = hybrid_func(user, item, algo, algo_2)
                if not np.isnan(pred) and not np.isnan(true_r):
                    hybrid_predictions.append(pred)
                    true_ratings.append(true_r)
                    
                    # Calculate NDCG@K for each user
                    user_test_ratings = test_df[test_df['user'] == user]
                    true_liked_movies = user_test_ratings[user_test_ratings['rating'] >= 4]['item'].tolist()
                    
                    if true_liked_movies:
                        predictions = [(movie_id, hybrid_func(user, movie_id, algo)) for movie_id in unseen_movies_dict[user]]
                        predictions.sort(key=lambda x: x[1], reverse=True)
                        top_k_movies = [movie_id for movie_id, _ in predictions[:k]]
                        
                        ndcg = calculate_ndcg_at_k(top_k_movies, true_liked_movies, k=k)
                        ndcg_scores.append(ndcg)

            # Evaluate
            rmse = np.sqrt(mean_squared_error(true_ratings, hybrid_predictions))
            mae = mean_absolute_error(true_ratings, hybrid_predictions)
            mse = mean_squared_error(true_ratings, hybrid_predictions)
            average_ndcg = np.mean(ndcg_scores)
            
            rmse_results.append((known_ratings, rmse))
            mae_results.append((known_ratings, mae))
            mse_results.append((known_ratings, mse))
            ndcg_results.append((known_ratings, average_ndcg))
            
            print(f"RMSE on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {rmse}")
            print(f"MAE on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {mae}")
            print(f"MSE on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {mse}")
            print(f"NDCG@{k} on filtered cold-start test users (with {known_ratings} training ratings each) for {hybrid_name}: {average_ndcg}")

        # Create a DataFrame to store the results for the current model
        results_df = pd.DataFrame({
            'Model': hybrid_name,
            'Known Ratings': [r[0] for r in rmse_results],
            'RMSE': [r[1] for r in rmse_results],
            'MAE': [r[1] for r in mae_results],
            'MSE': [r[1] for r in mse_results],
            'NDCG@K': [r[1] for r in ndcg_results]
        })

        # Append the results to the all_results list
        all_results.append(results_df)

    # Concatenate all results into a single DataFrame
    final_results_df = pd.concat(all_results, ignore_index=True)
    return final_results_df


<h4>Run hybrid models with cold start data</h4>

In [60]:
hybrid_functions = [
    basic_weighted_hybrid_predict,
    # switiching_weighted_hybrid_predict,
    # ensemble_weighted_hybrid_predict,
]

hybrid_names = [
    'Basic Weighted Hybrid',
    # 'Switching Weighted Hybrid',
    # 'Ensemble Weighted Hybrid',
]

In [61]:


svd = SVD()
knn = KNNBasic()



In [62]:
final_results_df = run_hybrid_models(hybrid_functions, hybrid_names, svd,  train_df, test_df, eligible_users, known_ratings_list)
print(final_results_df)

Processing for 5 known ratings per user with Basic Weighted Hybrid...


KeyError: 109651

In [None]:
ensemble_hybrid_functions = [
    # basic_weighted_hybrid_predict,
    # switiching_weighted_hybrid_predict,
    ensemble_weighted_hybrid_predict,
]

ensemble_hybrid_names = [
    # 'Basic Weighted Hybrid',
    # 'Switching Weighted Hybrid',
    'Ensemble Weighted Hybrid',
]

In [None]:
final_results_df_2 = run_ensemble_hybrid_models(ensemble_hybrid_functions, ensemble_hybrid_names, svd, knn, train_df, test_df, eligible_users, known_ratings_list)
print(final_results_df_2)

Processing for 5 known ratings per user with Basic Weighted Hybrid...


KeyboardInterrupt: 

<h4>Prediction</h4>

In [None]:
# # Predict on the testset
# basic_weighted_hybrid_predictions = []
# basic_weighted_true_ratings = []

# for (user, item, true_r) in testset:
#     pred = basic_weighted_hybrid_predict(user, item, svd, colab_weight=0.7, content_weight=0.3)
#     # Ensure predictions and true ratings are not NaN
#     if not np.isnan(pred) and not np.isnan(true_r):
#         basic_weighted_hybrid_predictions.append(pred)
#         basic_weighted_true_ratings.append(true_r)

In [None]:
# # Step 5: Evaluate Hybrid Model
# # Calculate RMSE
# rmse = np.sqrt(mean_squared_error(basic_weighted_true_ratings, basic_weighted_hybrid_predictions))
# mae = mean_absolute_error(basic_weighted_true_ratings, basic_weighted_hybrid_predictions)

# print(f"Hybrid Model RMSE: {rmse:.4f}")
# print(f"Hybrid Model MAE: {mae:.4f}")

In [None]:
# # Predict on the testset
# switiching_weighted_hybrid_predictions = []
# switiching_weighted_true_ratings = []

# for (user, item, true_r) in testset:
#     pred = switiching_weighted_hybrid_predict(user, item)
#     # Ensure predictions and true ratings are not NaN
#     if not np.isnan(pred) and not np.isnan(true_r):
#         switiching_weighted_hybrid_predictions.append(pred)
#         switiching_weighted_true_ratings.append(true_r)

# # Step 5: Evaluate Hybrid Model
# # Calculate RMSE
# rmse = np.sqrt(mean_squared_error(switiching_weighted_true_ratings, switiching_weighted_hybrid_predictions))
# mae = mean_absolute_error(switiching_weighted_true_ratings, switiching_weighted_hybrid_predictions)

# print(f"Weighted Hybrid Model RMSE: {rmse:.4f}")
# print(f"Weighted Hybrid Model MAE: {mae:.4f}")

In [None]:

# # 4. Train the KNN Model
# knn = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
# knn.fit(trainset)

In [None]:
# # Predict on the testset
# ensemble_weighted_hybrid_predictions = []
# ensemble_weighted_true_ratings = []

# for (user, item, true_r) in testset:
#     pred = ensemble_weighted_hybrid_predict(user, item)
#     # Ensure predictions and true ratings are not NaN
#     if not np.isnan(pred) and not np.isnan(true_r):
#         ensemble_weighted_hybrid_predictions.append(pred)
#         ensemble_weighted_true_ratings.append(true_r)

# # Step 6: Evaluate Hybrid Model
# # Calculate RMSE
# rmse = np.sqrt(mean_squared_error(ensemble_weighted_true_ratings, ensemble_weighted_hybrid_predictions))
# mae = mean_absolute_error(ensemble_weighted_true_ratings, ensemble_weighted_hybrid_predictions)
# print(f"Knn Hybrid Model RMSE: {rmse:.4f}")
# print(f"Knn Hybrid Model MAE: {mae:.4f}")

In [None]:
# # Train the SVD model
# svdpp = SVDpp()
# svdpp.fit(trainset)

In [None]:
# # Predict on the testset
# hybrid_predictions = []
# true_ratings = []

# for (user, item, true_r) in testset:
#     pred = svdpp_weighted_hybrid_predict(user, item)
#     # Ensure predictions and true ratings are not NaN
#     if not np.isnan(pred) and not np.isnan(true_r):
#         hybrid_predictions.append(pred)
#         true_ratings.append(true_r)

# # Step 6: Evaluate Hybrid Model
# # Calculate RMSE
# rmse = np.sqrt(mean_squared_error(true_ratings, hybrid_predictions))
# mae = mean_absolute_error(true_ratings, hybrid_predictions)
# print(f"SVDpp Hybrid Model RMSE: {rmse:.4f}")
# print(f"SVDpp Hybrid Model MAE: {mae:.4f}")