In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def recommend_books(book_title, df, vectorizer, cosine_sim):
    """
    Recommends books based on semantic similarity to a given book title.

    Args:
        book_title (str): The title of the book to get recommendations for.
        df (pd.DataFrame): The DataFrame containing book data.
        vectorizer (TfidfVectorizer): The fitted TF-IDF vectorizer.
        cosine_sim (np.ndarray): The pre-computed cosine similarity matrix.

    Returns:
        pd.Series: A Series of recommended book titles, excluding the input book.
    """

    try:
        book_index = df[df['title'] == book_title].index[0]
    except IndexError:
        print(f"Book '{book_title}' not found in the dataset. Please check the title.")
        return None


    similarity_scores = list(enumerate(cosine_sim[book_index]))

    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    top_similar_books = similarity_scores[1:6]


    book_indices = [i[0] for i in top_similar_books]


    return df['title'].iloc[book_indices]


books_data = [
    {'title': 'The Lord of the Rings', 'description': 'A fantasy epic about a quest to destroy a powerful ring.'},
    {'title': 'The Hobbit', 'description': 'A fantasy adventure about a hobbit who joins a quest to reclaim a stolen treasure.'},
    {'title': 'The Hitchhikers Guide to the Galaxy', 'description': 'A comedic science fiction series about an Englishman who is a survivor of the Earth’s destruction.'},
    {'title': 'Dune', 'description': 'A science fiction masterpiece set on a desert planet at the center of a galactic conflict.'},
    {'title': 'Game of Thrones', 'description': 'A medieval fantasy series following the struggle for power among noble houses.'},
    {'title': '1984', 'description': 'A dystopian novel about a totalitarian regime and a man\'s rebellion against it.'},
    {'title': 'Brave New World', 'description': 'A dystopian science fiction novel about a society controlled by technology and social conditioning.'},
    {'title': 'The Chronicles of Narnia', 'description': 'A high fantasy series about children who discover a magical world through a wardrobe.'},
    {'title': 'Ender\'s Game', 'description': 'A military science fiction novel about a brilliant child trained to lead humanity against an alien race.'},
    {'title': 'Foundation', 'description': 'A science fiction series about a mathematician who predicts the fall of a galactic empire.'}
]


df = pd.DataFrame(books_data)

df['combined_features'] = df['title'] + ' ' + df['description']

tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

book_to_recommend_for = 'The Lord of the Rings'
recommendations = recommend_books(book_to_recommend_for, df, tfidf_vectorizer, cosine_sim)

if recommendations is not None:
    print(f"Top 5 recommendations for '{book_to_recommend_for}':")
    print(recommendations)

Top 5 recommendations for 'The Lord of the Rings':
1                             The Hobbit
4                        Game of Thrones
7               The Chronicles of Narnia
2    The Hitchhikers Guide to the Galaxy
3                                   Dune
Name: title, dtype: object
