# Book Recommendation System

### Importing Libraries

In [None]:
import pandas as pd
from math import sqrt
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
BR = pd.read_csv("../input/bookcrossing-dataset/Book reviews/BX-Book-Ratings.csv",encoding= 'unicode_escape',low_memory=False,sep=';') 

In [None]:
BR.head()

In [None]:
B = pd.read_csv("../input/bookcrossing-dataset/Book reviews/BX_Books.csv",encoding='unicode_escape',low_memory=False,sep=';',error_bad_lines=False)

In [None]:
B.head()

In [None]:
U = pd.read_csv("../input/bookcrossing-dataset/Book reviews/BX-Users.csv",encoding='unicode_escape',low_memory=False,sep=';',error_bad_lines=False)

In [None]:
U.head()

## Collaborative Filtering

###Collaborative Filtering or User-User Filtering is the technique that uses other users to recommend items to the input user. It finds users that have similar preferences as the input and then recommends items that they have liked to the new user. Here, Pearson Correlation Function is used for finding similar users.

###Books read by the User

In [None]:
userInput = [
            {'Book-Title':'The Satanic Verses', 'Book-Rating':3.5},
            {'Book-Title':'Don Quixote', 'Book-Rating':5},
            {'Book-Title':'To Kill a Mockingbird', 'Book-Rating':4.5},
            {'Book-Title':'A Passage to India', 'Book-Rating':2},
            {'Book-Title':'Beloved', 'Book-Rating':5}               
         ] 
inputBooks = pd.DataFrame(userInput)
inputBooks

In [None]:
#Filtering out the books by title
inputId = B[B['Book-Title'].isin(inputBooks['Book-Title'].tolist())]
#Then merging it so we can get ISBN. Merging is done implicitly by title.
inputBooks = pd.merge(inputId, inputBooks)
#Dropping columns that are not required
inputBooks = inputBooks.drop('Year-Of-Publication', 1)
inputBooks = inputBooks.drop('Image-URL-S', 1)
inputBooks = inputBooks.drop('Image-URL-M', 1)
inputBooks = inputBooks.drop('Image-URL-L', 1)
inputBooks = inputBooks.drop('Publisher', 1)
inputBooks = inputBooks.drop('Book-Author', 1)
inputBooks.head()

###Users who have read the same Books

In [None]:
NewReaderSubset = BR[BR['ISBN'].isin(inputBooks['ISBN'].tolist())] 
NewReaderSubset

In [None]:
#Grouping by User-ID
NewReaderSubsetGroup = NewReaderSubset.groupby(['User-ID'])

In [None]:
#Sorting it so that the users with books most common with the input will have priority
NewReaderSubsetGroup = sorted(NewReaderSubsetGroup,  key=lambda x: len(x[1]), reverse=True)

In [None]:
NewReaderSubsetGroup[0:5]

###Using Pearson Correlation

In [None]:
#Storing the Pearson Correlation in a dictionary
pearsonCorrelationDict = {}
#For every new reader group in our subset
for name, group in NewReaderSubsetGroup:
    #Sorting the input and current user group so the values aren't mixed up later on
    group = group.sort_values(by='ISBN')
    inputBooks = inputBooks.sort_values(by='ISBN')
    nRatings = len(group)
    #Getting the review scores for books that they both have in common
    temp_df = inputBooks[inputBooks['ISBN'].isin(group['ISBN'].tolist())]
    #And then storing them in a temporary buffer variable in a list format to facilitate future calculations
    tempRatingList = temp_df['Book-Rating'].tolist()
    #Putting the current user group reviews in a list format
    tempGroupList = group['Book-Rating'].tolist()
    #Calculating the pearson correlation between two users, so called, x and y
    Sxx = sum([i**2 for i in tempRatingList]) - pow(sum(tempRatingList),2)/float(nRatings)
    Syy = sum([i**2 for i in tempGroupList]) - pow(sum(tempGroupList),2)/float(nRatings)
    Sxy = sum( i*j for i, j in zip(tempRatingList, tempGroupList)) - sum(tempRatingList)*sum(tempGroupList)/float(nRatings)
    #If the denominator is different than zero, then divide, else, 0 correlation.
    if Sxx!= 0 and Syy!= 0:
      pearsonCorrelationDict[name] = Sxy/sqrt(Sxx*Syy)
    else:
      pearsonCorrelationDict[name] = 0


In [None]:
pearsonCorrelationDict.items()

In [None]:
pearsonDF = pd.DataFrame.from_dict(pearsonCorrelationDict, orient='Index')
pearsonDF.columns = ['Similarity Index']
pearsonDF['User-ID'] = pearsonDF.index
pearsonDF.index = range(len(pearsonDF))
pearsonDF.head()

###Most Similar Users

In [None]:
topUsers=pearsonDF.sort_values(by='Similarity Index', ascending=False)[0:50]
topUsers.head()

###Ratings of Selected Users for all Books

In [None]:
topUsersRating=topUsers.merge(BR,left_on='User-ID',right_on='User-ID',left_index=False, right_index=False,how='inner')
topUsersRating.head()

In [None]:
topUsersRating['Weighted Rating'] = topUsersRating['Similarity Index']*topUsersRating['Book-Rating']
topUsersRating.head()

In [None]:
tempTopUsersRating = topUsersRating.groupby('ISBN').sum()[['Similarity Index','Weighted Rating']]
tempTopUsersRating.columns = ['sum_similarityIndex','sum_weightedRating']
tempTopUsersRating.head()

In [None]:
#Creates an empty dataframe
recommendation_df = pd.DataFrame()
#Taking the weighted average
recommendation_df['Weighted Average Recommendation Score'] = tempTopUsersRating['sum_weightedRating']/tempTopUsersRating['sum_similarityIndex']
recommendation_df['ISBN'] = tempTopUsersRating.index
recommendation_df.head()

In [None]:
recommendation_df = recommendation_df.sort_values(by='Weighted Average Recommendation Score', ascending=False)
recommendation_df.head()

###Recommended Books for New User

In [None]:
B.loc[B['ISBN'].isin(recommendation_df.head(10)['ISBN'].tolist())]