In [12]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [13]:
r = pd.read_csv('goodbooks-10k-dataset/ratings.csv')
tr = pd.read_csv('goodbooks-10k-dataset/to_read.csv')
b = pd.read_csv('goodbooks-10k-dataset/books.csv')

t = pd.read_csv('goodbooks-10k-dataset/tags.csv')
bt = pd.read_csv('goodbooks-10k-dataset/book_tags.csv')
bg = pd.read_csv('goodbooks-10k-dataset/book_genre.csv')

In [14]:
books_df = bg.groupby('goodreads_book_id')['genre'].apply(lambda x: ' '.join(set(x))).reset_index()

# Combine book features
b = b.merge(books_df, how='left').fillna('')
b['content'] = b['title'] + ' ' + b['authors'] + ' ' + b['original_title'] + ' ' + b['genre']

In [15]:
from sklearn.model_selection import train_test_split

# Prepare the books data
books_df = b[['goodreads_book_id', 'title', 'authors', 'original_title', 'genre']].copy()
books_df = books_df.rename(columns={'goodreads_book_id': 'book_id'})
books_df['content'] = books_df['title'] + ' ' + books_df['authors'] + ' ' + books_df['original_title'] + ' ' + books_df['genre']

# Prepare ratings data
ratings = r[['user_id', 'book_id', 'rating']].copy()

# Split into train and test (80/20)
train_ratings, test_ratings = train_test_split(ratings, test_size=0.2, random_state=42)

# Further split test into validation and test if needed
# val_ratings, test_ratings = train_test_split(test_ratings, test_size=0.5, random_state=42)

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(books_df['content'])

In [23]:
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from scipy.sparse import csr_matrix
from scipy.stats import pearsonr

def item_content_based_recommendation(books_df, ratings_df, user_id, n_recommendations=5):
    """
    Item content-based filtering using TF-IDF on genres/tags.

    Args:
        books_df: DataFrame (book_id, content)
        r: Ratings DataFrame (user_id, book_id, rating)
        user_id: Target user
        n_recommendations: Number of results

    Returns:
        List of recommended book IDs
    """

    book_ids = books_df['book_id'].values
    sim_matrix = cosine_similarity(tfidf_matrix)
    sim_df = pd.DataFrame(sim_matrix, index=book_ids, columns=book_ids)

    user_ratings = ratings_df[ratings_df['user_id'] == user_id]
    books_rated = user_ratings['book_id'].values
    ratings_given = user_ratings.set_index('book_id')['rating']

    scores = {}
    for book in books_rated:
        if book not in sim_df.index:
            continue
        similar_books = sim_df[book]
        for other_book, sim_score in similar_books.items():
            if other_book in books_rated:
                continue
            scores.setdefault(other_book, 0)
            scores[other_book] += sim_score * ratings_given.get(book, 0)

    ranked = pd.Series(scores).sort_values(ascending=False)
    return ranked.head(n_recommendations).index.tolist()

In [24]:
from sklearn.metrics import ndcg_score

def evaluate_recommendations(test_ratings, recommendations, k=5):
    """
    Evaluate recommendations against test set
    
    Args:
        test_ratings: DataFrame with true user-item ratings
        recommendations: Dict {user_id: list of recommended book_ids}
        k: Top-k recommendations to evaluate
    
    Returns:
        Dictionary of evaluation metrics
    """
    # Create a test set of user-item pairs with ratings >= threshold (e.g., 4)
    test_positives = test_ratings[test_ratings['rating'] >= 4]
    test_dict = test_positives.groupby('user_id')['book_id'].apply(list).to_dict()
    
    precision_scores = []
    recall_scores = []
    ndcg_scores = []
    
    for user_id, recs in recommendations.items():
        if user_id not in test_dict:
            continue
            
        true_positives = test_dict[user_id]
        if not true_positives:
            continue
            
        # Get top-k recommendations
        top_k_recs = recs[:k]
        
        # Calculate hits
        hits = len(set(top_k_recs) & set(true_positives))
        
        # Precision@k
        precision = hits / k
        precision_scores.append(precision)
        
        # Recall@k
        recall = hits / len(true_positives)
        recall_scores.append(recall)
        
        # NDCG@k (we'll use binary relevance here)
        relevance = [1 if book in true_positives else 0 for book in top_k_recs]
        ideal_relevance = sorted(relevance, reverse=True)
        if sum(ideal_relevance) > 0:
            ndcg = ndcg_score([ideal_relevance], [relevance], k=k)
            ndcg_scores.append(ndcg)
    
    return {
        'precision@k': np.mean(precision_scores) if precision_scores else 0,
        'recall@k': np.mean(recall_scores) if recall_scores else 0,
        'ndcg@k': np.mean(ndcg_scores) if ndcg_scores else 0,
        'coverage': len(recommendations) / test_ratings['user_id'].nunique(),
        'num_users_evaluated': len(precision_scores)
    }

In [25]:
import random
from tqdm import tqdm

# Randomly sample 5000 users from test set (excluding users with no train ratings)
test_users = test_ratings['user_id'].unique()
train_users = set(train_ratings['user_id'].unique())

# Only evaluate users that appear in both train and test
valid_users = list(set(test_users) & train_users)
sample_users = random.sample(valid_users, min(5000, len(valid_users)))

print(f"Evaluating on {len(sample_users)} users with both train and test ratings")

# Generate recommendations with progress bar
recommendations = {}
for user_id in tqdm(sample_users, desc="Generating recommendations"):
    try:
        recs = item_content_based_recommendation(books_df, train_ratings, user_id, n_recommendations=10)
        recommendations[user_id] = recs
    except Exception as e:
        # Uncomment below for debugging
        # print(f"Error for user {user_id}: {str(e)}")
        continue

metrics = evaluate_recommendations(test_ratings, recommendations, k=10)
print("Evaluation Metrics:")
print(f"Precision: {metrics['precision@k']:.4f}")
print(f"Recall: {metrics['recall@k']:.4f}")
print(f"NDCG: {metrics['ndcg@k']:.4f}")
print(f"Coverage: {metrics['coverage']:.2%}")
print(f"Users evaluated: {metrics['num_users_evaluated']}")

Generating recommendations:   0%|                      | 0/5000 [00:00<?, ?it/s]

Evaluating on 5000 users with both train and test ratings


Generating recommendations: 100%|█████████| 5000/5000 [5:13:28<00:00,  3.76s/it]


Evaluation Metrics:
Precision@5: 0.0246
Recall@5: 0.0163
NDCG@5: 0.4451
Coverage: 9.36%
Users evaluated: 4996
