In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [3]:
# Load the dataset
books = pd.read_csv('C:/Users/Mystic/Desktop/Books_Recommendation/data/Books.csv', low_memory=False)

In [5]:
# Check column names
print(books.columns.tolist())

['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher', 'Image-URL-S', 'Image-URL-M', 'Image-URL-L']


In [7]:
books = books.rename(columns={
    'Book-Title': 'title',
    'Book-Author': 'authors'
})

# Create a dummy description using publisher as a weak substitute
books['description'] = books['Publisher'].fillna('')

In [9]:
# Remove duplicate titles (case-insensitive)
books['title_lower'] = books['title'].str.lower()
books = books.drop_duplicates(subset='title_lower')
books = books.drop(columns='title_lower')

In [11]:
# Combine title, authors, and description
books['combined'] = (
    books['title'].fillna('') + ' ' +
    books['authors'].fillna('') + ' ' +
    books['description'].fillna('')
)

In [13]:
# Vectorize the combined text using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(books['combined'])

In [15]:
nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
nn_model.fit(tfidf_matrix)

In [17]:
books = books.reset_index()
indices = pd.Series(books.index, index=books['title'].str.lower())

In [19]:
def recommend_books(title, n_recommendations=5):
    title = title.lower()
    if title not in indices:
        return ["❌ Book not found."]
    
    idx = indices[title]
    distances, indices_nn = nn_model.kneighbors(tfidf_matrix[idx], n_neighbors=n_recommendations + 1)
    
    rec_indices = indices_nn.flatten()[1:]  # Skip the book itself
    return books['title'].iloc[rec_indices].tolist()

In [21]:
recommend_books("The Hobbit")

['Lo Hobbit / The Hobbit',
 'Hobbit',
 'The Annotated Hobbit: The Hobbit, Or, There and Back Again',
 'The Hobbit: Or, There and Back Again',
 'The Hobbit: or There and Back Again']