In [2]:

import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

# Load the dataset 
books = pd.read_csv('/kaggle/input/goodbooks-10k/books.csv')
ratings = pd.read_csv('/kaggle/input/goodbooks-10k/ratings.csv')
book_tags = pd.read_csv('/kaggle/input/goodbooks-10k/book_tags.csv')
tags = pd.read_csv('/kaggle/input/goodbooks-10k/tags.csv')

# Create train and test datasets
train_ratings, test_ratings = train_test_split(ratings, test_size=0.2, random_state=42)
train_ratings.to_csv('/kaggle/working/train.csv', index=False)
test_ratings.to_csv('/kaggle/working/test.csv', index=False)

# Print confirmation
print("Datasets created: train.csv and test.csv")

# Collaborative Filtering setup using Truncated SVD
def collaborative_recommender(user_id, n_recommendations=10):
    # Remove duplicates by averaging ratings for each user-book pair
    ratings_cleaned = ratings.groupby(['user_id', 'book_id']).agg({'rating': 'mean'}).reset_index()

    # Pivot the cleaned ratings into a user-book matrix
    user_book_matrix = ratings_cleaned.pivot(index='user_id', columns='book_id', values='rating').fillna(0)
    
    # Apply TruncatedSVD for dimensionality reduction
    svd = TruncatedSVD(n_components=20)
    latent_matrix = svd.fit_transform(user_book_matrix)
    
    # Compute the cosine similarity between users
    user_similarity = cosine_similarity(latent_matrix)
    
    # Get the similarity scores for the specific user
    similar_users = list(enumerate(user_similarity[user_id - 1]))
    
    # Sort the users by similarity scores in descending order
    similar_users = sorted(similar_users, key=lambda x: x[1], reverse=True)
    
    # Find top similar users
    top_users = similar_users[1:10] 
    
    # Aggregate ratings from similar users for recommendation
    top_user_ids = [user[0] + 1 for user in top_users]  # Adjust index to user_id
    similar_user_ratings = ratings_cleaned[ratings_cleaned['user_id'].isin(top_user_ids)]
    
    # Recommend books that the user hasn't rated yet
    user_rated_books = ratings_cleaned[ratings_cleaned['user_id'] == user_id]['book_id'].tolist()
    recommended_books = similar_user_ratings[~similar_user_ratings['book_id'].isin(user_rated_books)]
    top_recommended_books = recommended_books['book_id'].value_counts().head(n_recommendations).index.tolist()

    # Return the book titles of the top recommendations
    return books[books['book_id'].isin(top_recommended_books)][['original_title', 'authors']]

# Content-based filtering (based on genre, author, or recent book)
def content_based_recommender(book_title=None):
    # Recommend books similar to a given book based on title
    if book_title is not None:
        book = books[books['original_title'].str.contains(book_title, case=False, na=False)].iloc[0]
        
        # Filter similar books by tags or authors
        tag_based_books = book_tags[book_tags['goodreads_book_id'] == book['book_id']]['tag_id']
        similar_books = book_tags[book_tags['tag_id'].isin(tag_based_books)]['goodreads_book_id'].unique()
        
        return books[books['book_id'].isin(similar_books)][['original_title', 'authors']].head(10)
    
    return pd.DataFrame(columns=['original_title', 'authors'])  # Return empty DataFrame if no title provided

#  Hybrid Recommender System combining content-based and collaborative filtering
def hybrid_recommender(user_id, favorite_genre=None, favorite_author=None, recent_book=None, n_recommendations=10):
    # Content-based filtering: Initialize an empty DataFrame for content-based recommendations
    content_recs = pd.DataFrame()

    # If the user inputs a recent book they liked
    if recent_book:
        content_recs = content_based_recommender(recent_book)
    
    # If the user inputs a favorite genre
    if favorite_genre:
        # Get the tag ID for the input genre
        genre_tag_id = tags[tags['tag_name'].str.contains(favorite_genre, case=False, na=False)].iloc[0]['tag_id']
        genre_books = book_tags[book_tags['tag_id'] == genre_tag_id]['goodreads_book_id'].unique()
        genre_books_df = books[books['book_id'].isin(genre_books)]
        content_recs = pd.concat([content_recs, genre_books_df[['original_title', 'authors']]])
    
    # If the user inputs a favorite author
    if favorite_author:
        author_books = books[books['authors'].str.contains(favorite_author, case=False, na=False)]
        content_recs = pd.concat([content_recs, author_books[['original_title', 'authors']]])
    
    # Drop duplicate recommendations and limit the result
    content_recs = content_recs.drop_duplicates().head(n_recommendations)

    # Collaborative filtering: If the user has past ratings, use collaborative filtering
    if user_id in ratings['user_id'].unique():
        collaborative_recs = collaborative_recommender(user_id, n_recommendations)
        # Combine content-based and collaborative recommendations
        final_recs = pd.concat([content_recs, collaborative_recs]).drop_duplicates().head(n_recommendations)
    else:
        # If the user has no past ratings, return only content-based recommendations
        final_recs = content_recs

    return final_recs

# Running the Recommender System with sample preferences
user_id = 1  # Example user_id, adjust as necessary for your dataset
favorite_genre = 'Fantasy'  # Example genre
favorite_author = 'J.K. Rowling'  # Example author
recent_book = 'Harry Potter'  # Example recent book

# Run the hybrid recommender based on the user input
recommendations = hybrid_recommender(
    user_id=user_id,
    favorite_genre=favorite_genre,
    favorite_author=favorite_author,
    recent_book=recent_book,
    n_recommendations=10
)

# Display the recommendations
print("\nHere are your book recommendations:")
print(recommendations)


Datasets created: train.csv and test.csv

Here are your book recommendations:
                             original_title                      authors
0                          The Hunger Games              Suzanne Collins
1  Harry Potter and the Philosopher's Stone  J.K. Rowling, Mary GrandPré
2                                  Twilight              Stephenie Meyer
3                     To Kill a Mockingbird                   Harper Lee
4                          The Great Gatsby          F. Scott Fitzgerald
5                    The Fault in Our Stars                   John Green
6        The Hobbit or There and Back Again               J.R.R. Tolkien
7                    The Catcher in the Rye                J.D. Salinger
8                          Angels & Demons                     Dan Brown
9                       Pride and Prejudice                  Jane Austen
