In [6]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv(r"C:\Users\DELL\Downloads\data.csv (1)\data.csv")

# Display the first 5 rows
print(df.head().to_markdown(index=False, numalign="left", stralign="left"))

# Print the column names and their data types
print(df.info())

| isbn13        | isbn10     | title          | subtitle   | authors                         | categories                    | thumbnail                                                                                             | description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      

In [11]:
# Select only the `description` and `title` columns
df_text = df[['description', 'title']]

# Fill missing values in `description` and `title` columns with an empty string
df_text['description'] = df_text['description'].fillna('')
df_text['title'] = df_text['title'].fillna('')

# Combine the `description` and `title` columns into a new column called `combined_features`
df_text['combined_features'] = df_text['description'] + ' ' + df_text['title']

# Save the updated DataFrame to a new CSV file
df_text.to_csv('preprocessed_data.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_text['description'] = df_text['description'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_text['title'] = df_text['title'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_text['combined_features'] = df_text['description'] + ' ' + df_text['title']


In [12]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the data - assuming preprocessed_data.csv is available
df_text = pd.read_csv('preprocessed_data.csv')

In [13]:
# Initialize the TF-IDF Vectorizer
# stop_words='english' removes common English words that don't add much meaning
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the 'combined_features' column
# This converts the text data into a matrix of TF-IDF features
tfidf_matrix = tfidf_vectorizer.fit_transform(df_text['combined_features'])

# Compute the cosine similarity matrix
# Cosine similarity measures the similarity between two non-zero vectors
# It is a commonly used metric for text similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [14]:
def get_recommendations(title, cosine_sim_matrix=cosine_sim, df=df_text):
    """
    Generates book recommendations based on a given book title using cosine similarity.

    Args:
        title (str): The title of the book for which to find recommendations.
        cosine_sim_matrix (np.array): The pre-computed cosine similarity matrix.
        df (pd.DataFrame): The DataFrame containing book titles and combined features.

    Returns:
        list: A list of recommended book titles, or a string message if the book is not found.
    """
    # Get the index of the book that matches the title (case-insensitive)
    # .str.contains is used for partial matching, na=False handles potential NaN values
    idx = df[df['title'].str.contains(title, case=False, na=False)].index

    # Check if any book matches the input title
    if len(idx) == 0:
        return "Book not found. Please try another title."
    else:
        # If multiple matches, take the first one (can be improved for user selection)
        idx = idx[0]

    # Get the pairwise similarity scores of all books with the selected book
    # enumerate adds an index to each similarity score
    sim_scores = list(enumerate(cosine_sim_matrix[idx]))

    # Sort the books based on the similarity scores in descending order
    # lambda x: x[1] specifies sorting by the similarity score (the second element of the tuple)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 5 most similar books.
    # sim_scores[1:6] excludes the book itself (which would have a similarity of 1 with itself)
    # and takes the next 5 most similar books.
    sim_scores = sim_scores[1:6]

    # Get the book indices from the sorted similarity scores
    book_indices = [i[0] for i in sim_scores]

    # Return the titles of the top 5 most similar books
    return df['title'].iloc[book_indices].tolist()

In [None]:
# 5. Chatbot Interaction
print("Welcome to the Book Recommendation Chatbot!")
print("Enter a book title to get recommendations, or type 'exit' to quit.")

while True:
    user_input = input("\nEnter a book title: ")
    if user_input.lower() == 'exit':
        print("Goodbye!")
        break
    
    recommendations = get_recommendations(user_input)
    
    if isinstance(recommendations, list):
        print("\nRecommended books:")
        for book in recommendations:
            print(f"- {book}")
    else:
        print(recommendations)


Welcome to the Book Recommendation Chatbot!
Enter a book title to get recommendations, or type 'exit' to quit.



Enter a book title:  history



Recommended books:
- History -
- A history of God
- A history of philosophy
- The Philosophy of History
- The Fate of Reason
