# Book Recommendation System

## Collaborative Filter Based 

In [247]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

**Dataset Loding**

In [248]:
prepro_data = pd.read_csv("/kaggle/input/bookcrossing-dataset/Books Data with Category Language and Summary/Preprocessed_data.csv")

In [249]:
prepro_data.head(3)

Unnamed: 0.1,Unnamed: 0,user_id,location,age,isbn,rating,book_title,book_author,year_of_publication,publisher,img_s,img_m,img_l,Summary,Language,Category,city,state,country
0,0,2,"stockton, california, usa",18.0,195153448,0,Classical Mythology,Mark P. O. Morford,2002.0,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,Provides an introduction to classical myths pl...,en,['Social Science'],stockton,california,usa
1,1,8,"timmins, ontario, canada",34.7439,2005018,5,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],timmins,ontario,canada
2,2,11400,"ottawa, ontario, canada",49.0,2005018,0,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],ottawa,ontario,canada


In [250]:
prepro_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1031175 entries, 0 to 1031174
Data columns (total 19 columns):
 #   Column               Non-Null Count    Dtype  
---  ------               --------------    -----  
 0   Unnamed: 0           1031175 non-null  int64  
 1   user_id              1031175 non-null  int64  
 2   location             1031175 non-null  object 
 3   age                  1031175 non-null  float64
 4   isbn                 1031175 non-null  object 
 5   rating               1031175 non-null  int64  
 6   book_title           1031175 non-null  object 
 7   book_author          1031174 non-null  object 
 8   year_of_publication  1031175 non-null  float64
 9   publisher            1031175 non-null  object 
 10  img_s                1031175 non-null  object 
 11  img_m                1031175 non-null  object 
 12  img_l                1031175 non-null  object 
 13  Summary              1031175 non-null  object 
 14  Language             1031175 non-null  object 
 15

In [251]:
print(prepro_data.shape)
prepro_data.isnull().sum()

(1031175, 19)


Unnamed: 0                 0
user_id                    0
location                   0
age                        0
isbn                       0
rating                     0
book_title                 0
book_author                1
year_of_publication        0
publisher                  0
img_s                      0
img_m                      0
img_l                      0
Summary                    0
Language                   0
Category                   0
city                   14103
state                  22798
country                35374
dtype: int64

In [252]:
prepro_data.duplicated().sum()

0

In [253]:
# Drop the unnecessary cols
new_book= prepro_data.drop(columns=['Unnamed: 0','location','age','year_of_publication','publisher','img_s','img_l','Summary','Language','city','state','country'])

In [254]:
new_book.head(3)

Unnamed: 0,user_id,isbn,rating,book_title,book_author,img_m,Category
0,2,195153448,0,Classical Mythology,Mark P. O. Morford,http://images.amazon.com/images/P/0195153448.0...,['Social Science']
1,8,2005018,5,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']
2,11400,2005018,0,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']


# Collaborative Filter Based 

In [255]:
new_book.head(3)

Unnamed: 0,user_id,isbn,rating,book_title,book_author,img_m,Category
0,2,195153448,0,Classical Mythology,Mark P. O. Morford,http://images.amazon.com/images/P/0195153448.0...,['Social Science']
1,8,2005018,5,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']
2,11400,2005018,0,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']


In [256]:
new_book.shape

(1031175, 7)

## Filtering 

**Here we are going to apply users based filtering**

In [257]:
# we will consider the active users for rating. active rater-> those users who rated more then 200 books. 

people_who_rated = new_book.groupby('user_id').count()['rating'] # so this is the collection of all the people who rated

active_rater = people_who_rated[people_who_rated>200] # these are the active rater
active_rater = active_rater.index
active_rater

Index([   254,   2276,   2766,   2977,   3363,   4017,   4385,   6251,   6323,
         6543,
       ...
       271705, 273979, 274004, 274061, 274301, 274308, 275970, 277427, 277639,
       278418],
      dtype='int64', name='user_id', length=811)

In [258]:
active_rater_df = new_book[new_book['user_id'].isin(active_rater)] # getting the user id by using active rater index

In [259]:
active_rater_df

Unnamed: 0,user_id,isbn,rating,book_title,book_author,img_m,Category
3,11676,0002005018,8,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']
6,85526,0002005018,0,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']
7,96054,0002005018,0,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']
10,177458,0002005018,0,Clara Callan,Richard Bruce Wright,http://images.amazon.com/images/P/0002005018.0...,['Actresses']
21,110912,0374157065,10,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,http://images.amazon.com/images/P/0374157065.0...,['Medical']
...,...,...,...,...,...,...,...
1031104,278418,1882419944,0,McSe Rapid Review for Tcp/Ip Microsoft Windows...,Michael A. Pastore,http://images.amazon.com/images/P/1882419944.0...,['Microsoft Windows NT.']
1031105,278418,1882419952,0,Windows Nt Server 4.0 in the Enterprise (Rapid...,Michael A. Pastore,http://images.amazon.com/images/P/1882419952.0...,9
1031106,278418,1882419960,0,McSe Systems Management Server 1.2: Rapid Revi...,Michael A. Pastore,http://images.amazon.com/images/P/1882419960.0...,9
1031107,278418,1892083043,0,Gotcha! Strategy Games for Math and Logic (Hom...,Denise Gaskins,http://images.amazon.com/images/P/1892083043.0...,9


## filter on books

In [260]:
"""Now we will filter books, we will not consider all the books for our recommendation. Let's set the threshold"""
# we'll apply this on the newly found df

filtered_df = active_rater_df.groupby('book_title')['rating'].count()>50
popular_books = filtered_df[filtered_df].index

In [261]:
final_book_df = active_rater_df[active_rater_df['book_title'].isin(popular_books)]

In [262]:
final_book_df

Unnamed: 0,user_id,isbn,rating,book_title,book_author,img_m,Category
31,11676,0399135782,9,The Kitchen God's Wife,Amy Tan,http://images.amazon.com/images/P/0399135782.0...,['Fiction']
33,36836,0399135782,0,The Kitchen God's Wife,Amy Tan,http://images.amazon.com/images/P/0399135782.0...,['Fiction']
34,46398,0399135782,9,The Kitchen God's Wife,Amy Tan,http://images.amazon.com/images/P/0399135782.0...,['Fiction']
38,113270,0399135782,0,The Kitchen God's Wife,Amy Tan,http://images.amazon.com/images/P/0399135782.0...,['Fiction']
39,113519,0399135782,0,The Kitchen God's Wife,Amy Tan,http://images.amazon.com/images/P/0399135782.0...,['Fiction']
...,...,...,...,...,...,...,...
1026324,269566,0670809381,0,Echoes,Maeve Binchy,http://images.amazon.com/images/P/0670809381.0...,9
1027192,271284,0440910927,0,The Rainmaker,John Grisham,http://images.amazon.com/images/P/0440910927.0...,9
1027489,271705,B0001PIOX4,0,Fahrenheit 451,Ray Bradbury,http://images.amazon.com/images/P/B0001PIOX4.0...,9
1029323,275970,1586210661,9,Me Talk Pretty One Day,David Sedaris,http://images.amazon.com/images/P/1586210661.0...,9


In [263]:
"""let's create our table using the books and the active raters to find out the relationship"""
final_df = final_book_df.pivot_table(index = 'book_title', columns = 'user_id', values = 'rating')

In [264]:
final_df.fillna(0,inplace = True)

In [265]:
final_df

user_id,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [269]:
""" we are going to use cosine similarity"""
# we've already imported the cosine_similarity
similarity_score = cosine_similarity(final_df)
similarity_score

array([[1.        , 0.10255025, 0.01220856, ..., 0.12110367, 0.07347567,
        0.04316046],
       [0.10255025, 1.        , 0.2364573 , ..., 0.07446129, 0.16773875,
        0.14263397],
       [0.01220856, 0.2364573 , 1.        , ..., 0.04558758, 0.04938579,
        0.10796119],
       ...,
       [0.12110367, 0.07446129, 0.04558758, ..., 1.        , 0.07085128,
        0.0196177 ],
       [0.07347567, 0.16773875, 0.04938579, ..., 0.07085128, 1.        ,
        0.10602962],
       [0.04316046, 0.14263397, 0.10796119, ..., 0.0196177 , 0.10602962,
        1.        ]])

In [270]:
"""everything is done and now we are ready for our recommendation funtion"""

def recommend(book_name):
    # fetch book index from book_name
    index = np.where(final_df.index == book_name)[0][0]
    similar_books = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1], reverse = True)[1:11]
    
    for i in similar_books:
        print(final_df.index[i[0]])
 

In [271]:
m = input("Enter Book Name to get recommendation: ")
print(f" Based on {m}Top 10 Books for You: ")
print('\n')
recommend(m)

Enter Book Name to get recommendation:  Year of Wonders


 Based on Year of WondersTop 10 Books for You: 


The Eight
A Heartbreaking Work of Staggering Genius
Bridget Jones: The Edge of Reason
Drowning Ruth (Oprah's Book Club)
Midwives: A Novel
The Mists of Avalon
The Sweet Potato Queens' Book of Love
The Pilot's Wife : A Novel
Into the Wild
The Temple of My Familiar


# The End