## Import

In [21]:
# Install the surprise package
!pip install -q -U scikit-surprise
from surprise import Dataset, Reader
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import arff
from surprise import KNNWithMeans
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV
from surprise import KNNBasic
from surprise import SVD
from surprise.model_selection import cross_validate
from surprise import accuracy
import random
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
from surprise import AlgoBase

# Load Data

In [2]:
path = os.path.expanduser('~/Documents/Studium/Master/Web Mining/Project/data_kindle_preprocessed.xlsx')
data_preprocessed = pd.read_excel(path, index_col=[0])

## Data Preparation for Memory-Based and Model-Based

In [3]:
#Reading the dataset
#df_filtered = data.filter(['reviewerID', 'asin', 'rating'], axis=1)
#reader = Reader(rating_scale=(1, 5))
#data_filtered = Dataset.load_from_df(df_filtered,reader)
#Splitting the dataset
#trainset, testset = train_test_split(data_filtered, test_size=0.3,random_state=10)

In [4]:
# Define the reader
reader = Reader(rating_scale=(1, 5))

# Load the dataset from DataFrame
data = Dataset.load_from_df(data_preprocessed[["reviewerID", "asin", "rating"]], reader)

# Get the full training set
trainset = data.build_full_trainset()

# Create test set
testset = trainset.build_testset()

## Data Preparation for Content-Based

In [5]:
data_contentBased = data_preprocessed.copy()

In [6]:
# Replace non-finite values with NaN
data_contentBased['publication_year'] = data_contentBased['publication_year'].replace([np.inf, -np.inf], np.nan)

# Convert NaN to a placeholder value (e.g., -1)
data_contentBased['publication_year'].fillna(-1, inplace=True)

# Convert the column to integers
data_contentBased['publication_year'] = data_contentBased['publication_year'].astype(int).astype(str)

#preprare the textfiled 'book_info' to make vector
data_contentBased["book_info"] =  data_contentBased['category_string'] + '  ' + data_contentBased['brand'] + '  ' + data_contentBased['paid_free']+ ' ' + data_contentBased['print_length_category'] + ' ' + data_contentBased['publication_year'] + '  ' + data_contentBased['language'] 
data_contentBased.drop(['rating', 'brand', 'reviewerID', 'language','print_length_category', 'publication_year', 'category_string', 'paid_free'],axis=1,inplace=True)
data_contentBased.drop_duplicates(subset=['asin', 'title'], inplace=True)
data_contentBased.head()

Unnamed: 0,asin,title,book_info
0,B0015Z7VFQ,Look What Santa Brought (The Perfect Gift) - K...,"Kindle Store, Kindle eBooks, Literature & Fict..."
1,B0017HNV1U,Babylonian Laws- The Oldest Code of Laws in th...,"Kindle Store, Kindle eBooks, History King of ..."
2,B001892EI8,The Billionaire&s Baby (Harlequin Mini # 19) -...,"Kindle Store, Kindle eBooks, Romance Leanne B..."
4,B001892DGG,The Wallflower (Halle Puma Book 1) - Kindle ed...,"Kindle Store, Kindle eBooks, Romance Visit Am..."
5,B001BRD238,Secrets: a PsyCop Novel - Kindle edition,"Kindle Store, Kindle eBooks, Romance Visit Am..."


# Collaborative Filtering Recommender System (Memory-Based)

In [7]:
def item_based_KNNWithMeans_recommender_system_test(trainset, testset, data):

    # Define the parameter grid with an increased range of k and more CV folds
    param_grid = {'k': [3, 5, 10, 15, 20, 30, 35, 40],
                  'sim_options': {'name': ['cosine'],
                                  'user_based': [False]}
                  }
    
    # Use GridSearchCV to find the best model
    gs = GridSearchCV(KNNWithMeans, param_grid, measures=['rmse', 'mae'], cv=5)
    gs.fit(data)
    
    # Get the best model from grid search
    best_algo = gs.best_estimator['rmse']
    print("Best RMSE:", gs.best_score['rmse'])
    print("Best Parameters:", gs.best_params['rmse'])
    
    # Evaluate the best model using cross-validation
    cross_validate(best_algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
    
    # Train the best model with the new parameters
    # raus lassen -> algo = KNNWithMeans(k=gs.best_params['rmse']['k'], sim_options={'name': 'cosine', 'user_based': False})
    best_algo.fit(trainset) # best algo statt algo
    
    # Evaluate the trained model on the test set
    test_pred = best_algo.test(testset)
    print("Item-based Model : Test Set")
    accuracy.rmse(test_pred, verbose=True)
    
    return test_pred

# Content Recommender System

### TF-IDF Verctorizer and Cosine Similarity

In [8]:
def tfidf_recommender_system(data_contentBased):
    # Vectorize the text data using TfidfVectorizer
    tfidf_vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=1, stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(data_contentBased['book_info'])
    
    # Split the data into train, validation, and test sets
    train_data, test_data = train_test_split(data_contentBased, test_size=0.2, random_state=42)
    train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)
    
    # Debugging print
    print("Train Data Shape:", train_data.shape)
    print("Validation Data Shape:", val_data.shape)
    print("Test Data Shape:", test_data.shape)
    
    # Calculate cosine similarity matrices
    tfidf_matrix_train = tfidf_vectorizer.transform(train_data['book_info'])
    tfidf_matrix_val = tfidf_vectorizer.transform(val_data['book_info'])
    tfidf_matrix_test = tfidf_vectorizer.transform(test_data['book_info'])
    
    # Calculate cosine similarity matrices with correct dimensions
    cosine_sim_train = cosine_similarity(tfidf_matrix_train, tfidf_matrix_train)
    cosine_sim_val = cosine_similarity(tfidf_matrix_val, tfidf_matrix_train)
    cosine_sim_test = cosine_similarity(tfidf_matrix_test, tfidf_matrix_train)
    
    # Debugging print
    print("Cosine similarity matrix size (Train):", cosine_sim_train.shape)
    print("Cosine similarity matrix size (Validation):", cosine_sim_val.shape)
    print("Cosine similarity matrix size (Test):", cosine_sim_test.shape)
    
    # Implement function to recommend books
    def recommend(asin, cosine_sim_matrix, train_data):
        indices = train_data[train_data['asin'] == asin].index.tolist()
        if not indices:
            return []
        
        recommended_books = []
        for idx in indices:
            try:
                sim_scores = list(enumerate(cosine_sim_matrix[idx]))
                sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
                sim_scores = sim_scores[1:11]  # Top 10 similar items
                book_indices = [i[0] for i in sim_scores]
                recommended_books.extend(train_data['asin'].iloc[book_indices].tolist())
            except IndexError:
                continue
        
        return recommended_books if recommended_books else []  # Return empty list if no recommendations found
    
    # Pre-calculate recommendations for all books where recommendations are available
    all_asins = train_data['asin'].unique()
    pred_content_based_recommender_system = {asin: recommend(asin, cosine_sim_train, train_data)[:10] for asin in all_asins}
    
    # Evaluate the recommender system
    def evaluate_recommender(test_data, cosine_sim_matrix, train_data):
        y_true = []
        y_pred = []
        for asin in test_data['asin']:
            true_books = set(test_data[test_data['asin'] == asin]['asin'])
            recommended_books = set(pred_content_based_recommender_system.get(asin, []))  # Get precalculated recommendations
            y_true.append(len(true_books))
            y_pred.append(len(recommended_books))
    
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        return mae, rmse
    
    # Evaluate the recommender system
    mae_val, rmse_val = evaluate_recommender(val_data, cosine_sim_val, train_data)
    mae_test, rmse_test = evaluate_recommender(test_data, cosine_sim_test, train_data)
    
    print("Validation MAE:", mae_val)
    print("Validation RMSE:", rmse_val)
    print("Test MAE:", mae_test)
    print("Test RMSE:", rmse_test)

    return pred_content_based_recommender_system


In [9]:
pred_content_based_recommender_system = tfidf_recommender_system(data_contentBased)

Train Data Shape: (8700, 3)
Validation Data Shape: (2900, 3)
Test Data Shape: (2901, 3)
Cosine similarity matrix size (Train): (8700, 8700)
Cosine similarity matrix size (Validation): (2900, 8700)
Cosine similarity matrix size (Test): (2901, 8700)
Validation MAE: 1.0
Validation RMSE: 1.0
Test MAE: 1.0
Test RMSE: 1.0


In [10]:
pred_content_based_recommender_system

{'B00ITYTPYQ': [],
 'B00HMTTAQM': ['B00KO11PTA',
  'B00PNPOQ4S',
  'B00QL14R88',
  'B00C9RRTFQ',
  'B00N2HQW1Y',
  'B00AGGKQ16',
  'B006OIRC5S',
  'B00UDINSN0',
  'B012EBM6XG',
  'B00YSP8WMG'],
 'B00KOZTEXA': ['B016TEMQ9S',
  'B00G60VU2U',
  'B00FPYHJ9Q',
  'B00PO8EHSO',
  'B00JU84UD6',
  'B00LCAV4B6',
  'B011SB1Z8A',
  'B0167ZY4NK',
  'B00LNM8L6E',
  'B01G5SRJGS'],
 'B00P37QLBW': [],
 'B00HDFDVGK': [],
 'B00XWFRCX8': [],
 'B00KUHD3M0': ['B01FOSUOIA',
  'B00KZKZTNI',
  'B01BW83B72',
  'B0184SY5IM',
  'B00S6ROWI0',
  'B014CGOUOY',
  'B00RSRKCGA',
  'B00ND67XBC',
  'B00EW9HWJM',
  'B01DILPIRW'],
 'B01EM1UHO6': [],
 'B00R04OXC0': [],
 'B018FKLJV0': [],
 'B00ASZC4JW': ['B019M34ALK',
  'B01CHHHSL2',
  'B019PJ5LI2',
  'B00K1RB5JW',
  'B01DL6EQC2',
  'B00RC83ATG',
  'B00GW6S0DU',
  'B019MCXVS4',
  'B00KSND3LW',
  'B015TYIW42'],
 'B00WZML6RC': [],
 'B00951DMYQ': ['B00IMSGR04',
  'B00G61DDDI',
  'B00D71EQRW',
  'B00L4RE3O2',
  'B00XIMO26K',
  'B013RZCL6U',
  'B00RD4J9EE',
  'B00BUT8MIW',
  'B00

In [11]:
prediction_item_based_KNNWithMeans_test = item_based_KNNWithMeans_recommender_system_test(trainset, testset, data)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing th

In [12]:
prediction_item_based_KNNWithMeans_test

[Prediction(uid='A3OC8ZG1S3OAVA', iid='B0015Z7VFQ', r_ui=1.0, est=1, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='A3OC8ZG1S3OAVA', iid='B003BEDV08', r_ui=3.0, est=2.6028814935064934, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='A3OC8ZG1S3OAVA', iid='B00405R608', r_ui=3.0, est=2.6028814935064934, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='A3OC8ZG1S3OAVA', iid='B0043GX2HU', r_ui=3.0, est=2.6028814935064934, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='A3OC8ZG1S3OAVA', iid='B004C44556', r_ui=4.0, est=2.1028814935064934, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='A3OC8ZG1S3OAVA', iid='B004EYT9CS', r_ui=5.0, est=4.6028814935064934, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='A3OC8ZG1S3OAVA', iid='B004UC6H7O', r_ui=5.0, est=4.595738636363636, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='A3OC8ZG1S3OAVA', iid='B004W0C520', 

In [13]:
def hybrid_recommender_system(prediction, data_contentBased):
    # Convert prediction to a dictionary with user IDs as keys
    user_item_recs = {}
    for pred in prediction:
        user_id = pred.uid
        item_id = pred.iid
        if user_id not in user_item_recs:
            user_item_recs[user_id] = []
        user_item_recs[user_id].append(item_id)

    # Initialize dictionary for hybrid recommendations
    hybrid_recs = {}

    # For each user, generate recommendations
    for user_id, items in user_item_recs.items():
        # Initialize list to store recommendations for the user
        user_recommendations = items.copy()  # Store original recommendations

        # For each item recommended to the user
        for item in items:
            # Get similar item recommendations from TF-IDF
            similar_items = data_contentBased.get(item)

            # Check if similar_items is not None before iterating
            if similar_items is not None:
                # Extend user_recommendations with similar items (excluding already recommended items)
                user_recommendations.extend([item for item in similar_items if item not in items])

        # Count occurrences of each item ID (ASIN) in user_recommendations
        item_counts = {}
        for item in user_recommendations:
            if item in item_counts:
                item_counts[item] += 1
            else:
                item_counts[item] = 1

        # Sort items by count (descending order) and prioritize original recommendations if counts are equal
        sorted_items = sorted(item_counts.items(), key=lambda x: (-x[1], x[0]))

        # Take the top ten ASINs from sorted_items
        top_ten_asins = [item[0] for item in sorted_items[:10]]

        # Add user recommendations to the hybrid recommendations
        hybrid_recs[user_id] = top_ten_asins

    return hybrid_recs


In [14]:
# Call the hybrid recommender system function
hybrid_recommendations = hybrid_recommender_system(prediction_item_based_KNNWithMeans_test, pred_content_based_recommender_system)

In [15]:
hybrid_recommendations

{'A3OC8ZG1S3OAVA': ['B00GW6S0DU',
  'B00HND20QY',
  'B00K1RB5JW',
  'B00RC83ATG',
  'B00KSND3LW',
  'B00M3GWXHW',
  'B00MAX4ZPQ',
  'B015TYIW42',
  'B019PJ5LI2',
  'B01AKXRKO0'],
 'A2U8YWPP1PYHJM': ['B00ND67XBC',
  'B00A946MNQ',
  'B00L2X37QS',
  'B00N6CY244',
  'B00IHN8U0Y',
  'B00NLKJ4O4',
  'B00O0A6Y74',
  'B00Q10EP3Q',
  'B00UR3GQ1M',
  'B00V5AP3OW'],
 'A3361XGKYF17S3': ['B00M3GWXHW',
  'B00MAX4ZPQ',
  'B00HND20QY',
  'B00QSTMEOW',
  'B00UR3GQ1M',
  'B00YDI785U',
  'B015A9WZOE',
  'B015KXNYRM',
  'B019FZFFK0',
  'B007WNXQS6'],
 'AVGYENZU56KBR': ['B00HND20QY',
  'B00D2XOTN6',
  'B00JD5B6A6',
  'B004UWU9IM',
  'B00F5FBRD4',
  'B00FPTE110',
  'B005WVPKMU',
  'B00D01DLRK',
  'B00E6ZL3D8',
  'B00ENU0NDM'],
 'A13Q7A1UWMNUU6': ['B00D2XOTN6',
  'B00HKTD5RY',
  'B00MZAIT6K',
  'B00W05SHZS',
  'B00WRBQPE0',
  'B016ARHVIU',
  'B019DZKCTQ',
  'B0019BI4XE',
  'B001BRD238',
  'B002HE1GS2'],
 'A310KT1UQC5UNU': ['B00MAX4ZPQ',
  'B00HND20QY',
  'B00M3GWXHW',
  'B01AKXRKO0',
  'B01D3C6IAW',
  'B00M7

In [16]:
def hybrid_recommender_system(prediction, data_contentBased, testset):
    # Convert prediction to a dictionary with user IDs as keys
    user_item_recs = {}
    for pred in prediction:
        user_id = pred.uid
        item_id = pred.iid
        if user_id not in user_item_recs:
            user_item_recs[user_id] = []
        user_item_recs[user_id].append(item_id)

    # Initialize dictionary for hybrid recommendations
    hybrid_recs = {}

    # For each user, generate recommendations
    for user_id, items in user_item_recs.items():
        # Initialize list to store recommendations for the user
        user_recommendations = items.copy()  # Store original recommendations

        # For each item recommended to the user
        for item in items:
            # Get similar item recommendations from TF-IDF
            similar_items = data_contentBased.get(item)

            # Check if similar_items is not None before iterating
            if similar_items is not None:
                # Extend user_recommendations with similar items (excluding already recommended items)
                user_recommendations.extend([item for item in similar_items if item not in items])

        # Count occurrences of each item ID (ASIN) in user_recommendations
        item_counts = {}
        for item in user_recommendations:
            if item in item_counts:
                item_counts[item] += 1
            else:
                item_counts[item] = 1

        # Sort items by count (descending order) and prioritize original recommendations if counts are equal
        sorted_items = sorted(item_counts.items(), key=lambda x: (-x[1], x[0]))

        # Take the top ten ASINs from sorted_items
        top_ten_asins = [item[0] for item in sorted_items[:10]]

        # Add user recommendations to the hybrid recommendations
        hybrid_recs[user_id] = top_ten_asins

    # Calculate MAE and RMSE
    actual_ratings = []
    predicted_ratings = []
    for user_id, items in hybrid_recs.items():
        for item in items:
            actual_rating = [rating for uid, iid, rating in testset if uid == user_id and iid == item]
            if actual_rating:  # Check if actual rating exists
                actual_ratings.append(actual_rating[0])
                predicted_ratings.append(5)  # Assuming all predicted ratings are 5 (can be replaced with actual predictions)

    mae = mean_absolute_error(actual_ratings, predicted_ratings)
    rmse = np.sqrt(mean_squared_error(actual_ratings, predicted_ratings))

    return hybrid_recs, mae, rmse

# Call the hybrid recommender system function
hybrid_recommendations, mae, rmse = hybrid_recommender_system(prediction_item_based_KNNWithMeans_test, pred_content_based_recommender_system, testset)


In [17]:
# Evaluate the hybrid recommender system
hybrid_recommendations, mae, rmse = hybrid_recommender_system(prediction_item_based_KNNWithMeans_test, pred_content_based_recommender_system, testset)

# Print MAE and RMSE
print("MAE:", mae)
print("RMSE:", rmse)


MAE: 0.6
RMSE: 1.02469507659596


In [18]:
hybrid_recommendations

{'A3OC8ZG1S3OAVA': ['B00GW6S0DU',
  'B00HND20QY',
  'B00K1RB5JW',
  'B00RC83ATG',
  'B00KSND3LW',
  'B00M3GWXHW',
  'B00MAX4ZPQ',
  'B015TYIW42',
  'B019PJ5LI2',
  'B01AKXRKO0'],
 'A2U8YWPP1PYHJM': ['B00ND67XBC',
  'B00A946MNQ',
  'B00L2X37QS',
  'B00N6CY244',
  'B00IHN8U0Y',
  'B00NLKJ4O4',
  'B00O0A6Y74',
  'B00Q10EP3Q',
  'B00UR3GQ1M',
  'B00V5AP3OW'],
 'A3361XGKYF17S3': ['B00M3GWXHW',
  'B00MAX4ZPQ',
  'B00HND20QY',
  'B00QSTMEOW',
  'B00UR3GQ1M',
  'B00YDI785U',
  'B015A9WZOE',
  'B015KXNYRM',
  'B019FZFFK0',
  'B007WNXQS6'],
 'AVGYENZU56KBR': ['B00HND20QY',
  'B00D2XOTN6',
  'B00JD5B6A6',
  'B004UWU9IM',
  'B00F5FBRD4',
  'B00FPTE110',
  'B005WVPKMU',
  'B00D01DLRK',
  'B00E6ZL3D8',
  'B00ENU0NDM'],
 'A13Q7A1UWMNUU6': ['B00D2XOTN6',
  'B00HKTD5RY',
  'B00MZAIT6K',
  'B00W05SHZS',
  'B00WRBQPE0',
  'B016ARHVIU',
  'B019DZKCTQ',
  'B0019BI4XE',
  'B001BRD238',
  'B002HE1GS2'],
 'A310KT1UQC5UNU': ['B00MAX4ZPQ',
  'B00HND20QY',
  'B00M3GWXHW',
  'B01AKXRKO0',
  'B01D3C6IAW',
  'B00M7

In [24]:
# Create a dummy algorithm class that does nothing for the DummyAlgorithm class 
# that initializes the trainset attribute with a dummy value. Here's how you can modify the code:

class DummyAlgorithm(AlgoBase):
    def __init__(self):
        pass

    def fit(self, trainset):
        self.trainset = trainset  # Initialize trainset attribute with a dummy value
        pass

    def estimate(self, u, i):
        return 5  # Return a dummy rating of 5 for all predictions


def hybrid_recommender_system(prediction, data_contentBased, data):
    # Convert prediction to a dictionary with user IDs as keys
    user_item_recs = {}
    for pred in prediction:
        user_id = pred.uid
        item_id = pred.iid
        if user_id not in user_item_recs:
            user_item_recs[user_id] = []
        user_item_recs[user_id].append(item_id)

    # Initialize dictionary for hybrid recommendations
    hybrid_recs = {}

    # For each user, generate recommendations
    for user_id, items in user_item_recs.items():
        # Initialize list to store recommendations for the user
        user_recommendations = items.copy()  # Store original recommendations

        # For each item recommended to the user
        for item in items:
            # Get similar item recommendations from TF-IDF
            similar_items = data_contentBased.get(item)

            # Check if similar_items is not None before iterating
            if similar_items is not None:
                # Extend user_recommendations with similar items (excluding already recommended items)
                user_recommendations.extend([item for item in similar_items if item not in items])

        # Count occurrences of each item ID (ASIN) in user_recommendations
        item_counts = {}
        for item in user_recommendations:
            if item in item_counts:
                item_counts[item] += 1
            else:
                item_counts[item] = 1

        # Sort items by count (descending order) and prioritize original recommendations if counts are equal
        sorted_items = sorted(item_counts.items(), key=lambda x: (-x[1], x[0]))

        # Take the top ten ASINs from sorted_items
        top_ten_asins = [item[0] for item in sorted_items[:10]]

        # Add user recommendations to the hybrid recommendations
        hybrid_recs[user_id] = top_ten_asins

    # Cross-validate the hybrid recommender system
    dummy_algo = DummyAlgorithm()
    cross_val_results = cross_validate(dummy_algo, data, measures=['mae', 'rmse'], cv=5, verbose=True)
    avg_mae = np.mean(cross_val_results['test_mae'])
    avg_rmse = np.mean(cross_val_results['test_rmse'])

    # Calculate MAE and RMSE on the test data
    actual_ratings = []
    predicted_ratings = []
    for user_id, items in hybrid_recs.items():
        for item in items:
            actual_rating = [rating for (uid, iid, rating, _) in data.raw_ratings if uid == user_id and iid == item]
            if actual_rating:  # Check if actual rating exists
                actual_ratings.append(actual_rating[0])
                predicted_ratings.append(5)  # Assuming all predicted ratings are 5 (can be replaced with actual predictions)

    mae = mean_absolute_error(actual_ratings, predicted_ratings)
    rmse = np.sqrt(mean_squared_error(actual_ratings, predicted_ratings))

    # Print MAE and RMSE
    print("MAE:", mae)
    print("RMSE:", rmse)

    return hybrid_recs, avg_mae, avg_rmse, mae, rmse

# Call the hybrid recommender system function
hybrid_recommendations, avg_mae, avg_rmse, mae, rmse = hybrid_recommender_system(prediction_item_based_KNNWithMeans_test, pred_content_based_recommender_system, data)


Evaluating MAE, RMSE of algorithm DummyAlgorithm on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.6217  0.6446  0.6548  0.6137  0.6063  0.6282  0.0185  
RMSE (testset)    1.1045  1.1344  1.1545  1.1026  1.0835  1.1159  0.0253  
Fit time          0.00    0.01    0.00    0.00    0.00    0.00    0.00    
Test time         0.01    0.02    0.01    0.01    0.01    0.01    0.00    
MAE: 0.6
RMSE: 1.02469507659596


In [25]:
hybrid_recommendations

{'A3OC8ZG1S3OAVA': ['B00GW6S0DU',
  'B00HND20QY',
  'B00K1RB5JW',
  'B00RC83ATG',
  'B00KSND3LW',
  'B00M3GWXHW',
  'B00MAX4ZPQ',
  'B015TYIW42',
  'B019PJ5LI2',
  'B01AKXRKO0'],
 'A2U8YWPP1PYHJM': ['B00ND67XBC',
  'B00A946MNQ',
  'B00L2X37QS',
  'B00N6CY244',
  'B00IHN8U0Y',
  'B00NLKJ4O4',
  'B00O0A6Y74',
  'B00Q10EP3Q',
  'B00UR3GQ1M',
  'B00V5AP3OW'],
 'A3361XGKYF17S3': ['B00M3GWXHW',
  'B00MAX4ZPQ',
  'B00HND20QY',
  'B00QSTMEOW',
  'B00UR3GQ1M',
  'B00YDI785U',
  'B015A9WZOE',
  'B015KXNYRM',
  'B019FZFFK0',
  'B007WNXQS6'],
 'AVGYENZU56KBR': ['B00HND20QY',
  'B00D2XOTN6',
  'B00JD5B6A6',
  'B004UWU9IM',
  'B00F5FBRD4',
  'B00FPTE110',
  'B005WVPKMU',
  'B00D01DLRK',
  'B00E6ZL3D8',
  'B00ENU0NDM'],
 'A13Q7A1UWMNUU6': ['B00D2XOTN6',
  'B00HKTD5RY',
  'B00MZAIT6K',
  'B00W05SHZS',
  'B00WRBQPE0',
  'B016ARHVIU',
  'B019DZKCTQ',
  'B0019BI4XE',
  'B001BRD238',
  'B002HE1GS2'],
 'A310KT1UQC5UNU': ['B00MAX4ZPQ',
  'B00HND20QY',
  'B00M3GWXHW',
  'B01AKXRKO0',
  'B01D3C6IAW',
  'B00M7