  books = pd.read_csv('Books.csv')


Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [86]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load the data
books = pd.read_csv('books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

# Check for any null values
books.isnull().sum()
users.isnull().sum()
ratings.isnull().sum()

# Merge ratings with book details
ratings_with_name = ratings.merge(books, on='ISBN')

# Count number of ratings per book
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating': 'num_ratings'}, inplace=True)

# Calculate average rating per book (only 'Book-Rating' column is considered)
avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)

# Merge number of ratings and average ratings, filter popular books
popular_df = num_rating_df.merge(avg_rating_df, on='Book-Title')
popular_df = popular_df[popular_df['num_ratings'] >= 250].sort_values('avg_rating', ascending=False).head(50)
popular_df = popular_df.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'Book-Author', 'Image-URL-M', 'num_ratings', 'avg_rating']]

# Collaborative Filtering Based Recommender System
# Filter users with more than 200 ratings
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
authentic_users = x[x].index
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(authentic_users)]

# Filter books with more than 50 ratings
y = filtered_rating.groupby('Book-Title').count()['Book-Rating'] >= 50
famous_books = y[y].index
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

# Create a pivot table
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')
pt.fillna(0, inplace=True)

# Calculate similarity scores
similarity_scores = cosine_similarity(pt)

# Recommendation function
def recommend(book_name):
    # Fetch index of the book
    if book_name not in pt.index:
        return f"Book '{book_name}' not found in the dataset."
    index = np.where(pt.index == book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]
    
    # Compile recommended books data
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        data.append(item)
    
    return data

# Test the recommendation function
print(recommend('1984'))

# Save the necessary data
#import pickle
pickle.dump(popular_df, open(r'C:\book-recommender-system\pythonProject1\popular.pkl', 'wb'))

#pickle.dump(popular_df, open('popular.pkl', 'wb'))
pickle.dump(pt, open(r'C:\book-recommender-system\pythonProject1\pt.pkl', 'wb'))
pickle.dump(books.drop_duplicates('Book-Title'), open(r'C:\book-recommender-system\pythonProject1\books.pkl', 'wb'))
pickle.dump(similarity_scores, open(r'C:\book-recommender-system\pythonProject1\similarity_scores.pkl', 'wb'))

data = pickle.load(open(r'C:\book-recommender-system\pythonProject1\pt.pkl', 'rb'))
print(data.head())

  books = pd.read_csv('books.csv')


[['Animal Farm', 'George Orwell', 'http://images.amazon.com/images/P/0451526341.01.MZZZZZZZ.jpg'], ["The Handmaid's Tale", 'Margaret Atwood', 'http://images.amazon.com/images/P/0449212602.01.MZZZZZZZ.jpg'], ['Brave New World', 'Aldous Huxley', 'http://images.amazon.com/images/P/0060809833.01.MZZZZZZZ.jpg'], ['The Vampire Lestat (Vampire Chronicles, Book II)', 'ANNE RICE', 'http://images.amazon.com/images/P/0345313860.01.MZZZZZZZ.jpg']]
User-ID              254     2276    2766    2977    3363    4017    4385    \
Book-Title                                                                    
1984                    9.0     0.0     0.0     0.0     0.0     0.0     0.0   
1st to Die: A Novel     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2nd Chance              0.0    10.0     0.0     0.0     0.0     0.0     0.0   
4 Blondes               0.0     0.0     0.0     0.0     0.0     0.0     0.0   
A Bend in the Road      0.0     0.0     7.0     0.0     0.0     0.0     0.0   

User-I

# 

In [48]:
import pickle

try:
    popular_df = pickle.load(open('popular.pkl', 'rb'))
    print("Popular DF loaded successfully")
except Exception as e:
    print("Error loading popular.pkl:", e)

try:
    pt = pickle.load(open('pt.pkl', 'rb'))
    print("Pivot table (pt) loaded successfully")
except Exception as e:
    print("Error loading pt.pkl:", e)

try:
    books = pickle.load(open('books.pkl', 'rb'))
    print("Books DF loaded successfully")
except Exception as e:
    print("Error loading books.pkl:", e)

try:
    similarity_scores = pickle.load(open('similarity_scores.pkl', 'rb'))
    print("Similarity scores loaded successfully")
except Exception as e:
    print("Error loading similarity_scores.pkl:", e)

Popular DF loaded successfully
Pivot table (pt) loaded successfully
Books DF loaded successfully
Similarity scores loaded successfully
