In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from difflib import get_close_matches

# Load the CSV file
df = pd.read_csv('final_model_data.csv')

# Perform the group by operation and count the Book-Rating values for each User-ID
x = df.groupby('User-ID')['Book-Rating'].count()

# Filter users who have more than 120 ratings
top_users = x[x > 120].index

# Filter the dataframe to include only these top users
filtered_rating = df[df['User-ID'].isin(top_users)]

# Group by "Book-Title" and count the Book-Rating values
y = filtered_rating.groupby("Book-Title")['Book-Rating'].count()

# Lowering the threshold to identify famous books
famous_books_threshold = 10  # Adjust this as needed
famous_books = y[y > famous_books_threshold].index

# Filter the dataframe to include only these famous books
final_rating = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

# Create the pivot table
pt = final_rating.pivot_table(index="Book-Title", columns="User-ID", values="Book-Rating")
pt.fillna(0, inplace=True)

# Compute the cosine similarity matrix
similarity_scores = cosine_similarity(pt)

def recommend(book_name):
    if book_name in pt.index:
        # Fetch recommendations for a book that is in the dataset
        index = np.where(pt.index == book_name)[0][0]
        similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]
    else:
        # Handle books not in the dataset
        similar_titles = get_close_matches(book_name, pt.index, n=1, cutoff=0.5)
        if similar_titles:
            # Find the most similar book in the dataset and use its recommendations
            index = np.where(pt.index == similar_titles[0])[0][0]
            similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]
        else:
            # Recommend the top 4 books if no close match is found
            similar_items = [(i, 0) for i in range(4)]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = df[df['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    
    return data

# Test the function
print(recommend('War and Peace'))



[['Two for the Dough', 'Janet Evanovich', 'http://images.amazon.com/images/P/0671001795.01.MZZZZZZZ.jpg'], ['Snow Falling on Cedars', 'David Guterson', 'http://images.amazon.com/images/P/067976402X.01.MZZZZZZZ.jpg'], ['The Testament', 'John Grisham', 'http://images.amazon.com/images/P/0440234743.01.MZZZZZZZ.jpg'], ['The Color Purple', 'Alice Walker', 'http://images.amazon.com/images/P/0671617028.01.MZZZZZZZ.jpg']]
