## Project Step 2: Book Recommender System

In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

# Load the datasets
ratings = pd.read_csv('Ratings.csv', delimiter=';')
books = pd.read_csv('Books.csv', delimiter=';')

# Filter out entries with negative ratings
ratings = ratings[ratings['Rating'] >= 0].copy()

# Map UserID and ISBN to numeric indices
ratings['user_idx'] = ratings['User-ID'].astype('category').cat.codes
ratings['book_idx'] = ratings['ISBN'].astype('category').cat.codes

# Create the user-item interaction matrix
user_book_matrix = csr_matrix(
    (ratings['Rating'], (ratings['user_idx'], ratings['book_idx']))
)

# Build lookup dictionaries
idx_to_userid = dict(enumerate(ratings['User-ID'].astype('category').cat.categories))
idx_to_isbn = dict(enumerate(ratings['ISBN'].astype('category').cat.categories))
isbn_to_title = books.set_index('ISBN')['Title'].to_dict()

# Compute cosine similarity between users
print("Computing user similarity matrix...")
user_similarity = cosine_similarity(user_book_matrix, dense_output=False)

# Set parameters
k_neighbors = 10
top_recommendations = 5
final_recommendations = []

# Generate recommendations
for user in tqdm(range(user_book_matrix.shape[0]), desc="Generating recommendations"):
    user_vector = user_book_matrix[user]

    similarities = user_similarity[user].toarray().flatten()
    similarities[user] = 0  # Remove self-similarity

    top_users = np.argsort(similarities)[-k_neighbors:]

    neighbor_ratings = user_book_matrix[top_users]

    read_books = set(user_vector.nonzero()[1])
    candidate_books = set(neighbor_ratings.nonzero()[1]) - read_books

    book_scores = {}
    weights = similarities[top_users]
    for book in candidate_books:
        ratings = neighbor_ratings[:, book].toarray().flatten()
        if np.sum(weights) > 0:
            predicted_score = np.dot(weights, ratings) / np.sum(weights)
            book_scores[book] = predicted_score

    best_books = sorted(book_scores.items(), key=lambda x: x[1], reverse=True)[:top_recommendations]
    for book, score in best_books:
        isbn = idx_to_isbn[book]
        final_recommendations.append({
            'User_ID': idx_to_userid[user],
            'Book_ID': isbn,
            'Book_Title': isbn_to_title.get(isbn, "Unknown Title"),
            'Recommendation_Score': round(score, 2)
        })

# Save recommendations to CSV
output_path = 'Top5_Book_Recommendations.csv'
pd.DataFrame(final_recommendations).to_csv(output_path, index=False)
print(f"Recommendations saved to {output_path}")

Computing user similarity matrix...


Generating recommendations: 100%|██████████| 105283/105283 [47:54<00:00, 36.63it/s] 


Recommendations saved to Top5_Book_Recommendations.csv
