In [3]:
# Install required packages
!pip install pandas numpy scikit-learn --quiet

import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error
from math import sqrt

# -------------------------
# Load dataset (Book ratings)
# -------------------------
df = pd.read_csv(
    'https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv'
)

# For speed, use only first 10k ratings
df = df.head(10000)

# Create user-item rating matrix
rating_matrix = df.pivot_table(index='user_id', columns='book_id', values='rating').fillna(0)

# -------------------------
# Apply Matrix Factorization (SVD)
# -------------------------
svd = TruncatedSVD(n_components=20, random_state=42)
matrix = svd.fit_transform(rating_matrix)

# Reconstruct predicted ratings
pred_ratings = np.dot(matrix, svd.components_)

# -------------------------
# Evaluate with RMSE
# -------------------------
# Convert both to numpy arrays
true_values = rating_matrix.values
mask = true_values > 0

rmse = sqrt(mean_squared_error(true_values[mask], pred_ratings[mask]))
print(f"RMSE: {rmse:.4f}")

# -------------------------
# Recommend top 5 books for a given user
# -------------------------
user_id = 1  # Change as needed
user_index = rating_matrix.index.get_loc(user_id)
sorted_indices = np.argsort(-pred_ratings[user_index])
top_items = rating_matrix.columns[sorted_indices][:5]

print(f"\nTop 5 recommendations for user {user_id}:")
for item in top_items:
    print(f"Book ID: {item}")


RMSE: 2.7058

Top 5 recommendations for user 1:
Book ID: 115
Book ID: 476
Book ID: 570
Book ID: 323
Book ID: 94
