# 📚 Interactive Book Recommender (User-Based Filtering)
This notebook generates the `.pkl` files used in your Flask app.

In [None]:
# ✅ Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [None]:
# ✅ Step 2: Load the datasets
books = pd.read_csv('books.csv')
ratings = pd.read_csv('ratings.csv')
users = pd.read_csv('users.csv')

In [None]:
# ✅ Step 3: Clean and rename columns
books.rename(columns={'Book-Title': 'Title', 'Book-Author': 'Author', 'Image-URL-M': 'Image'}, inplace=True)
ratings.rename(columns={'Book-Rating': 'Rating'}, inplace=True)

In [None]:
# ✅ Step 4: Filter users with more than 50 ratings
ratings = ratings[ratings['Rating'] > 0]
active_users = ratings['User-ID'].value_counts() > 50
ratings = ratings[ratings['User-ID'].isin(active_users[active_users].index)]

In [None]:
# ✅ Step 5: Merge ratings with book info
merged_data = ratings.merge(books, on='ISBN')

In [None]:
# ✅ Step 6: Create user-book matrix
user_book_matrix = merged_data.pivot_table(index='User-ID', columns='Title', values='Rating').fillna(0)

In [None]:
# ✅ Step 7: Compute user similarity matrix
user_similarity = cosine_similarity(user_book_matrix)
np.fill_diagonal(user_similarity, 0)

In [None]:
# ✅ Step 8: Top books for homepage
top_books = merged_data.groupby('Title').agg({'Rating': ['count', 'mean']})
top_books.columns = ['RatingCount', 'AverageRating']
top_books = top_books.sort_values('RatingCount', ascending=False).head(50)
top_books = top_books.merge(books[['Title', 'Author', 'Image']].drop_duplicates('Title'), on='Title', how='left')

In [None]:
# ✅ Step 9: Save everything for Flask app
pickle.dump(user_book_matrix, open('user_book_matrix.pkl', 'wb'))
pickle.dump(user_similarity, open('user_similarity.pkl', 'wb'))
pickle.dump(top_books, open('top_books.pkl', 'wb'))
pickle.dump(books, open('book_details.pkl', 'wb'))

print("✅ All files generated! You can now run app.py")

In [None]:
# ✅ Step 10: Evaluate with Precision@5
# We'll randomly pick a few users and check how many recommended books they actually rated highly

def precision_at_k(user_id, k=5):
    try:
        index = np.where(user_book_matrix.index == user_id)[0][0]
    except:
        return None

    similar_users = sorted(list(enumerate(user_similarity[index])), key=lambda x: x[1], reverse=True)[1:4]
    recommended_books = set()

    for sim_user in similar_users:
        sim_user_id = user_book_matrix.index[sim_user[0]]
        sim_ratings = user_book_matrix.loc[sim_user_id]
        top_books = sim_ratings[sim_ratings > 8].sort_values(ascending=False).index
        for book in top_books:
            if book not in recommended_books:
                recommended_books.add(book)
            if len(recommended_books) >= k:
                break
        if len(recommended_books) >= k:
            break

    actual_user_books = user_book_matrix.loc[user_id]
    liked_books = set(actual_user_books[actual_user_books > 8].index)

    if not liked_books:
        return None

    hits = len(recommended_books & liked_books)
    return hits / k

# Run evaluation for a few users
sample_users = user_book_matrix.index[:10]
precisions = [precision_at_k(uid) for uid in sample_users if precision_at_k(uid) is not None]

if precisions:
    print(f"✅ Average Precision@5 for 10 sample users: {round(np.mean(precisions), 2)}")
else:
    print("Not enough data to evaluate precision.")
