### Books Recommendation System

### Installing Libraries

In [331]:
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy import int64
import warnings
warnings.filterwarnings("ignore")

import requests
import IPython.display as Disp
import plotly.express as px

import sklearn
from IPython.core.display import display,HTML
from sklearn.decomposition import TruncatedSVD

### Data Reading 

In [332]:
# Books data reading
book=pd.read_csv("C:Books.txt") 

In [333]:
#Rating data reading 
rating=pd.read_csv("C:Ratings.txt")

In [334]:
book.shape,rating.shape

((10000, 23), (162604, 3))

In [335]:
pd.set_option('display.max_columns',100)
# book.head(3)

In [336]:
# rating.head(3)

### Data Merging

In [337]:
# Data merging done on common column i.e. book id
df=rating.merge(book,on='book_id')

In [338]:
# df.head(2)

In [339]:
# df.describe()

In [340]:
# df.info()

### Data Cleaning 

In [341]:
# df.isna().sum()

In [342]:
#Filling null values of original_title with Na and 0
df['original_title']=df['original_title'].fillna('Na') 
df['original_publication_year']=df['original_publication_year'].fillna(0)

In [343]:
#Since, all columns of data are not required. So, extracting the columns that are only necessary
dfuse=df[['user_id','book_id','rating','books_count','original_publication_year','average_rating','ratings_count','title','original_title','authors','image_url']]

In [344]:
# dfuse.head(3)

### Visualisation

### Q1 Maximum Count of Ratings are of ?

In [345]:
# px.bar(dfuse.groupby('rating')['book_id'].count(), title='Plot of Rating with Books Count',color_discrete_sequence =['green']*3, height=500, width=700)

As shown here, maximum rating of 4 are given 58.821K times where rating 5 are given 46.31K. Least rating given is 1 to only 3633. This shows given in data are good books since rating 3 are 4 times given than rating 2.

### Q2 Top 5 Books which are Rated in High Count ?

In [346]:
# px.bar(dfuse.groupby('book_id')['rating'].count().sort_values(ascending=False).nlargest(5), title='Plot of Top 5 Highly Rated in Counts',color_discrete_sequence =['light blue']*3, height=500, width=700 )

As shown maximum rating count is of book id 26 with 1010 count followed by count of 922 of book id of 2 and 4.

### Q3 Which are Top Highly Rated Books Based on Average Rating ?

In [347]:
# px.bar(dfuse.groupby(str('title'))['average_rating'].mean().sort_values(ascending=False).nlargest(5), title='Plot of Top 3 Highly Rated Books on Basis of Average Ratings',color_discrete_sequence =['red']*3,height=500, width=700 )

As shown above, highest rating average of 4.82 is given to book "The Complete Calvin and Hobbies" followed by rating of 4.77 to Harry Potter(1-5)

### Q4 Which User Rated Maximum ?

In [348]:
# px.bar(dfuse.groupby(str('user_id'))['rating'].count().sort_values(ascending=False).nlargest(10), title='Plot of User Id with Ratings Count',color_discrete_sequence =['orange']*3 , height=500, width=700)

As shown above, user id of 2276 rated maximum books of 185 in count followed by value of 179 by user id 4147 then value of 172 with user id 1794

### Q5 Which Original Title has Maximum Rating ?

In [349]:
df_1=pd.DataFrame(dfuse.groupby('original_title')['rating'].count().sort_values(ascending=False).nlargest(6))

In [350]:
df_1['Image']=" "
def path_to_image_html(path):
    return '<img src="'+ path + '" width="60" >'

In [351]:
for i in range(1,len(df_1.index)):
    url=dfuse[dfuse['original_title']==df_1.index[i]]['image_url'].unique()
    df_1['Image'][i]=url[0]

In [352]:
image_cols = ['Image']

# Create the dictionariy to be passed as formatters
format_dict = {}
for image_col in image_cols:
    format_dict[image_col] = path_to_image_html

# display(HTML(df_1[1:].to_html(escape=False ,formatters=format_dict)))

As given in above dataframe, "The Da Vinci Code" has the maximum rating count of 1010 haing book id 26 as mentioned in Q2 

### Recommendation System 

In [353]:
# create pivot table to study the user and book relation
df_pivot=dfuse.pivot_table(values='rating',index='user_id',columns='original_title',fill_value=0)

In [354]:
# df_pivot.head(5)

In [355]:
# Transposing the pivot table to easy access
x=df_pivot.values.T  
# x.shape 

#### Dimentionality Reduction Using SVD

In [356]:
SVD  = TruncatedSVD(n_components=20, random_state=17)
result_matrix = SVD.fit_transform(x)
# result_matrix.shape

#### Creating the Pearson's Correlation 

In [357]:
corr_mat = np.corrcoef(result_matrix)
# corr_mat.shape

#### Recommendation of The Entered Book

In [358]:
book_names = df_pivot.columns
book_list = list(book_names)
book_index = book_list.index('Drowning Ruth') 

In [359]:
corr_book = corr_mat[book_index] 
# corr_book.shape

In [360]:
print("Recommendation are")
# list(book_names[(corr_book<1.0) & (corr_book>0.8)][1:])

Recommendation are


### Function for Recommendation System Gives Top 5 Books

In [361]:
def path_to_image_html(path):
    return '<img src="'+ path + '" width="60" >'

def df_recommend(recommend):
    recommend=recommend[:5]
    year=[]
    image_url=[]
    for i in recommend:
        for j in dfuse.index:
            if dfuse['original_title'][j]==i:
                year.append(dfuse['original_publication_year'][j])
                image_url.append(dfuse['image_url'][j])  
                break
    recommend_df=pd.DataFrame([recommend,year,image_url]).T
    recommend_df.columns=['Recommend Books','Year of Publication','Image']
    
    image_cols = ['Image']


    format_dict = {}
    for image_col in image_cols:
        format_dict[image_col] = path_to_image_html

    display(HTML(recommend_df[0:10].to_html(escape=False ,formatters=format_dict)))

In [362]:
def recommend_book(df_pivot, corr_mat):
    name=input("Enter the Name of the Book : ")
    book_names = df_pivot.columns
    book_list = list(book_names)
    try:
        if name in book_list:
            book_index = book_list.index(name) 
            corr_book = corr_mat[book_index] 
            print("Recommendation are")
            recommend=list(book_names[(corr_book<1.0) & (corr_book>0.8)][1:])
            df_recommend(recommend)

        else:
            name=" "+name
            book_index = book_list.index(name) 
            corr_book = corr_mat[book_index] 
            print("Recommendation are")
            recommend=list(book_names[(corr_book<1.0) & (corr_book>0.8)][1:])
            df_recommend(recommend)
    except:
        print("Enter the Book Name Again")
        recommend_book(df_pivot,corr_mat)    
    

In [363]:
recommend_book(df_pivot,corr_mat)   

Enter the Book Name Again
Enter the Book Name Again
Recommendation are


Unnamed: 0,Recommend Books,Year of Publication,Image
0,"A Kiss of Shadows (Merry Gentry, #1)",2000.0,
1,Battleaxe (The Axis Trilogy #1),1995.0,
2,Bloody Bones,1996.0,
3,"Burnt Offerings (Anita Blake, Vampire Hunter, #7)",1998.0,
4,"Chainfire (Sword of Truth, #9)",2004.0,
