# BOOK RECOMMENDER using HYBRID FILTERING

In [30]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [49]:
df_average_ratings = pd.read_csv('D:/Internship/Hybrid_Based_Filtering/AverageRatings.csv') # Average Ratings file csv
df_rating_counts = pd.read_csv('D:/Internship/Hybrid_Based_Filtering/RatingsCount.csv') # Count of Ratings file csv
df_book_data = pd.read_csv('D:/Internship/Hybrid_Based_Filtering/FinalData.csv') #Overall dataset of books file csv

In [50]:
#creating a temporary data frame
df_temp = pd.merge(df_rating_counts, df_average_ratings, on = 'book_id')
#renaming columns since both the data sets have same name of columns
df_temp.rename(columns = {'rating_x':'no. of votes', 'rating_y':'average rating'}, inplace = True)

In [51]:
#Merging the temporary data frame along with final book data
df_book = pd.merge(df_temp, df_book_data, on = 'book_id')  

In [52]:
# Replacing unwanted symbols
df_book = df_book.replace('\,',' ',regex=True).astype(object)  
df_book = df_book.replace('\;',' ',regex=True).astype(object)
df_book = df_book.replace('\.',' ',regex=True).astype(object)

In [53]:
#Concatenating genres and authors for every book
df_book['keywords'] = df_book['authors']+" "+df_book['Genres']

In [54]:
#dropping columns
del df_book['authors']
del df_book['Genres']
df_book.head()

Unnamed: 0,book_id,no. of votes,average rating,title,keywords
0,1,22806,4.279707,The Hunger Games (The Hunger Games #1),Suzanne Collins SciFi Drama
1,2,21850,4.35135,Harry Potter and the Sorcerer's Stone (Harry P...,J K Rowling Mary GrandPré Fantasy Young-Age
2,3,16931,3.214341,Twilight (Twilight #1),Stephenie Meyer Fantasy
3,4,19088,4.329369,To Kill a Mockingbird,Harper Lee Self-Help Drama
4,5,16604,3.772224,The Great Gatsby,F Scott Fitzgerald Drama


In [55]:
# convert keywords column data to matrix to be able to compute similarity
tfidf = TfidfVectorizer(ngram_range=(1,2), min_df=0)
tfidf_matrix = tfidf.fit_transform(df_book['keywords'])
print(tfidf_matrix.shape)

(999, 3376)


In [56]:
# compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# create reverse title indices series
indices = pd.Series(df_book.index, index=df_book['title'])

In [90]:
def get_recommendation(title):
    # get index of given title
    ind = indices[title]
    
    # get similarity scores along with indices
    sim_scores = list(enumerate(cosine_sim[ind]))
    
    # sort the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # get top 10 similarity scores
    sim_scores = sim_scores[1:11]
    
    # get movie indices
    book_indices = [i[0] for i in sim_scores]
    book_scores = [i[1] for i in sim_scores]
    
    # get movie titles
    books_title = df_book.iloc[book_indices]['title']
    books_votes = df_book.iloc[book_indices]['no. of votes']
    books_rate = df_book.iloc[book_indices]['average rating']
    
    #creating a data frame with similarity index
    books = pd.DataFrame({'Recommended Books':books_title, 'Average Rating':books_rate, 'No. of Ratings':books_votes, 'Similarity Score':book_scores})
    
    #sorting the books based on average rating and similarity score
    books = books.sort_values(by = ["Average Rating", "Similarity Score"], ascending=False)
    return books

In [91]:
n = input()
get_recommendation(n)

The Great Gatsby


Unnamed: 0,Recommended Books,Average Rating,No. of Ratings,Similarity Score
69,Ender's Game (Ender's Saga #1),4.258221,8849,0.131387
745,The Lies of Locke Lamora (Gentleman Bastard #1),4.216792,1596,0.151437
759,Ender's Shadow (Ender's Shadow #1),4.203125,1792,0.131387
965,Presumed Innocent,4.060852,1479,0.127521
491,Speaker for the Dead (Ender's Saga #2),3.89916,2618,0.13198
385,Island of the Blue Dolphins (Island of the Blu...,3.795248,3409,0.147588
518,Pretties (Uglies #2),3.736866,2208,0.182905
186,Uglies (Uglies #1),3.733889,3600,0.157228
922,The Alchemyst (The Secrets of the Immortal Nic...,3.727924,1257,0.136935
837,Xenocide (Ender's Saga #3),3.661883,1742,0.131387


In [92]:
n = input()
get_recommendation(n)

Frankenstein


Unnamed: 0,Recommended Books,Average Rating,No. of Ratings,Similarity Score
101,Where the Wild Things Are,4.264177,6401,0.093624
401,The Tell-Tale Heart and Other Writings,4.094926,2897,0.082376
793,Doctor Sleep (The Shining #2),4.030845,1621,0.131535
798,Watchers,3.999347,1531,0.098223
238,World War Z: An Oral History of the Zombie War,3.93871,3720,0.095914
430,Red Dragon (Hannibal Lecter #1),3.920258,2483,0.104472
348,Salem's Lot,3.859463,3387,0.131535
456,The Historian,3.667986,2780,0.096112
577,Christine,3.527211,2352,0.080092
608,Cell,3.446663,1903,0.131535
