In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [3]:
# Load the dataset
df = pd.read_csv("books_test.csv")  # Ensure columns: 'title', 'description'

In [4]:
# Fill missing descriptions with empty strings
df['description'] = df['description'].fillna("")

In [5]:
# Create the TF-IDF matrix
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['description'])

In [None]:
# Function to recommend top 3 similar books
def recommend_books(input_title, tfidf_matrix=tfidf_matrix):
    if input_title not in df['title'].values:
        print(f"'{input_title}' not found in the dataset.")
        return

    # Get the index of the input title
    idx = df[df['title'] == input_title].index[0]

    # Compute cosine similarities
    cosine_sim = linear_kernel(tfidf_matrix[idx], tfidf_matrix).flatten()

    # Get top 3 similar books excluding the input book itself
    similar_indices = cosine_sim.argsort()[-4:-1][::-1]

    # Print recommendations
    print(f"\nTop 3 recommendations for '{input_title}':")
    for i in similar_indices:
        print(f"- {df.iloc[i]['title']}")
