In [1]:
#Search based Recommendation System
#Load Datasets
import pandas as pd
books = pd.read_csv("/content/drive/MyDrive/Books/Books.csv")
ratings = pd.read_csv('/content/drive/MyDrive/Books/Ratings.csv')

  books = pd.read_csv("/content/drive/MyDrive/Books/Books.csv")


In [2]:
# Merging both dataset
df = pd.merge(books, ratings, on='ISBN', how='left')
df.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,User-ID,Book-Rating
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,2.0,0.0
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,8.0,5.0
2,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,11400.0,0.0
3,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,11676.0,8.0
4,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,41385.0,0.0


In [3]:
#Data Cleaning
df.isnull().sum()

Unnamed: 0,0
ISBN,0
Book-Title,0
Book-Author,2
Year-Of-Publication,0
Publisher,2
Image-URL-S,0
Image-URL-M,0
Image-URL-L,4
User-ID,1209
Book-Rating,1209


In [5]:
df.dropna(inplace=True)
df = df.sample(n=20000, random_state=42).reset_index(drop=True)


In [8]:
df.to_csv('/content/drive/MyDrive/Books/final_df.csv')

## ***Search-Based Recommendation System***

In [10]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

df = pd.read_csv("/content/drive/MyDrive/Books/final_df.csv")

# Drop duplicates based on 'Book-Title'
df = df.drop_duplicates(subset=['Book-Title']).reset_index(drop=True)

tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['Book-Title'])  # Recompute TF-IDF after dropping duplicates

In [11]:
from sklearn.metrics.pairwise import cosine_similarity

def search_based_recommendation(book_title, df,tfidf, tfidf_matrix, top_n=10):
    """
    Recommend books based on similarity of titles using TF-IDF and cosine similarity.

    Parameters:
        book_title (str): The title of the book to search for recommendations.
        df (pd.DataFrame): The dataset containing book information.
        tfidf_matrix (sparse matrix): Precomputed TF-IDF matrix for book titles.
        top_n (int): Number of recommendations to return.

    Returns:
        pd.DataFrame: Recommended books (title, rating, image URL).
    """


    # Transform the input title to match TF-IDF dimensions
    query_vector = tfidf.transform([book_title])

    # Compute cosine similarity between the query and all book titles
    cosine_sim = cosine_similarity(query_vector, tfidf_matrix).flatten()

    # Get indices of the top N most similar books (excluding the input book itself)
    sim_scores = list(enumerate(cosine_sim))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Sort by similarity
    sim_scores = sim_scores[1:top_n + 1]  # Exclude the input book itself

    # Get the indices of the recommended books
    book_indices = [i[0] for i in sim_scores]

    # Return the relevant columns for the recommended books
    return df[['Book-Title', 'Book-Rating', 'Image-URL-M']].iloc[book_indices].reset_index(drop=True)

In [12]:
# Example usage
book_to_search = "The Fountainhead"  # Replace with the title you want to search
recommended_books = search_based_recommendation(book_to_search, df,tfidf, tfidf_matrix)

recommended_books

Unnamed: 0,Book-Title,Book-Rating,Image-URL-M
0,The Book of the Crime,7.0,http://images.amazon.com/images/P/0553238116.0...
1,The Novel,8.0,http://images.amazon.com/images/P/0449221431.0...
2,"The Return of the King (The Lord of the Rings,...",0.0,http://images.amazon.com/images/P/039548930X.0...
3,The Return of the Shadow (The History of The L...,7.0,http://images.amazon.com/images/P/0261102249.0...
4,The Book of the Sun,10.0,http://images.amazon.com/images/P/0762401400.0...
5,The War of the Ring (The History of The Lord o...,8.0,http://images.amazon.com/images/P/0618083596.0...
6,"The Lion, the Witch, and the Wardrobe (The Chr...",8.0,http://images.amazon.com/images/P/0064471047.0...
7,"The Lion, the Witch, and the Wardrobe",9.0,http://images.amazon.com/images/P/0590254766.0...
8,The Fellowship of the Ring (The Lord of the Ri...,9.0,http://images.amazon.com/images/P/0345339703.0...
9,The Hobbit and The Lord of the Rings,10.0,http://images.amazon.com/images/P/0345340426.0...


In [13]:
# Example usage
book_to_search = "Pinocchio"  # Replace with the title you want to search
recommended_books = search_based_recommendation(book_to_search, df,tfidf, tfidf_matrix)

recommended_books

Unnamed: 0,Book-Title,Book-Rating,Image-URL-M
0,The Adventures of Pinocchio: The Story of a Pu...,0.0,http://images.amazon.com/images/P/0968876803.0...
1,Walt Disney's Pinocchio and His Puppet Show Ad...,0.0,http://images.amazon.com/images/P/0394826264.0...
2,Tender Is the Night: Essays in Criticism,0.0,http://images.amazon.com/images/P/0835792463.0...
3,The Hot Dog Companion: A Connoiseur's Guide to...,0.0,http://images.amazon.com/images/P/0867307617.0...
4,Flesh and Blood,0.0,http://images.amazon.com/images/P/0881845337.0...
5,Athenaise (Phoenix 60p Paperbacks),7.0,http://images.amazon.com/images/P/1857997409.0...
6,Lessons in Truth,0.0,http://images.amazon.com/images/P/0871591081.0...
7,"Lifelines: Patterns of Work, Love, and Learnin...",0.0,http://images.amazon.com/images/P/1555423647.0...
8,The Corpse Had a Familiar Face : Covering Miam...,0.0,http://images.amazon.com/images/P/0743493648.0...
9,She's Come Undone (Oprah's Book Club (Paperback)),5.0,http://images.amazon.com/images/P/0671003755.0...


In [14]:
# save files for search base recommendation
import pickle

pickle.dump(tfidf,open("/content/drive/MyDrive/Books/tfidf.pkl",'wb'))
pickle.dump(tfidf_matrix, open("/content/drive/MyDrive/Books/tfidf_matrix.pkl",'wb'))