<a href="https://colab.research.google.com/github/mariaxclarisse/Book-Recommendation-System/blob/main/Book_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Books Recommender System

In this project, we will implement a few recommendation algorithms (Basic Recommender, Content-based, Collaborative Filtering, and Hybrid).

# Importing Libraries and Loading Our Data <a id="1"></a> <br>

In [None]:
# importing libraries and packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import warnings
warnings.filterwarnings('ignore')

In [None]:
# loading our datasets from Goodreads
books = pd.read_csv('/content/books.csv')
ratings = pd.read_csv('/content/ratings.csv')
book_tags = pd.read_csv('/content/book_tags.csv')
tags = pd.read_csv('/content/tags.csv')

# Data Pre-Processing <a id="2"></a> <br>

In [None]:
books['original_publication_year'] = books['original_publication_year'].fillna(-1).apply(lambda x: int(x) if x != -1 else -1)

In [None]:
ratings_rmv_duplicates = ratings.drop_duplicates()
unwanted_users = ratings_rmv_duplicates.groupby('user_id')['user_id'].count()
unwanted_users = unwanted_users[unwanted_users < 3]
unwanted_ratings = ratings_rmv_duplicates[ratings_rmv_duplicates.user_id.isin(unwanted_users.index)]
new_ratings = ratings_rmv_duplicates.drop(unwanted_ratings.index)


In [None]:
new_ratings['title'] = books.set_index('id').title.loc[new_ratings.book_id].values

In [None]:
new_ratings.head(5)

Unnamed: 0,book_id,user_id,rating,title
0,1,314,5,"The Hunger Games (The Hunger Games, #1)"
1,1,439,3,"The Hunger Games (The Hunger Games, #1)"
2,1,588,5,"The Hunger Games (The Hunger Games, #1)"
3,1,1169,4,"The Hunger Games (The Hunger Games, #1)"
4,1,1185,4,"The Hunger Games (The Hunger Games, #1)"


# Simple Recommender <a id="3"></a> <br>


In [None]:
# using IMDB weighted rating formula

# v is the number of ratings for the book
# m is the minimum ratings required to be listed in the chart
# R is the average rating of the book
# C is the mean rating across the whole report

v = books['ratings_count']
m = books['ratings_count'].quantile(0.95)
R = books['average_rating']
C = books['average_rating'].mean()
W = (R*v + C*m) / (v + m)

In [None]:
books['weighted_rating'] = W

In [None]:
qualified  = books.sort_values('weighted_rating', ascending=False).head(250)

## Top Books <a id="4"></a> <br>

**SIMPLE RECOMMENDER 1.1**
Getting top books based on ratings only (general)

In [None]:
#we can get a table of recommendations based on overall ratings in the dataset, and set how many rows to show
qualified[['title', 'authors', 'average_rating', 'weighted_rating']].head(20)

Unnamed: 0,title,authors,average_rating,weighted_rating
24,Harry Potter and the Deathly Hallows (Harry Po...,"J.K. Rowling, Mary GrandPré",4.61,4.555956
26,Harry Potter and the Half-Blood Prince (Harry ...,"J.K. Rowling, Mary GrandPré",4.54,4.490428
17,Harry Potter and the Prisoner of Azkaban (Harr...,"J.K. Rowling, Mary GrandPré, Rufus Beck",4.53,4.48509
23,Harry Potter and the Goblet of Fire (Harry Pot...,"J.K. Rowling, Mary GrandPré",4.53,4.483227
1,Harry Potter and the Sorcerer's Stone (Harry P...,"J.K. Rowling, Mary GrandPré",4.44,4.424365
20,Harry Potter and the Order of the Phoenix (Har...,"J.K. Rowling, Mary GrandPré",4.46,4.419054
30,The Help,Kathryn Stockett,4.45,4.405158
38,"A Game of Thrones (A Song of Ice and Fire, #1)",George R.R. Martin,4.45,4.398759
134,"A Storm of Swords (A Song of Ice and Fire, #3)",George R.R. Martin,4.54,4.396645
421,"Harry Potter Boxset (Harry Potter, #1-7)",J.K. Rowling,4.74,4.391147


## Top "Genres" Books <a id="5"></a> <br>

In [None]:
genres = ["Art", "Biography", "Business", "Chick Lit", "Children's", "Christian", "Classics",
          "Comics", "Contemporary", "Cookbooks", "Crime", "Ebooks", "Fantasy", "Fiction",
          "Gay and Lesbian", "Graphic Novels", "Historical Fiction", "History", "Horror",
          "Humor and Comedy", "Manga", "Memoir", "Music", "Mystery", "Nonfiction", "Paranormal",
          "Philosophy", "Poetry", "Psychology", "Religion", "Romance", "Science", "Science Fiction",
          "Self Help", "Suspense", "Spirituality", "Sports", "Thriller", "Travel", "Young Adult"]

In [None]:
genres = list(map(str.lower, genres))
genres[:4]

['art', 'biography', 'business', 'chick lit']

In [None]:
available_genres = tags.loc[tags.tag_name.str.lower().isin(genres)]

In [None]:
available_genres_books = book_tags[book_tags.tag_id.isin(available_genres.tag_id)]

In [None]:
print('There are {} books that are tagged with above genres'.format(available_genres_books.shape[0]))

There are 60573 books that are tagged with above genres


In [None]:
available_genres_books['genre'] = available_genres.tag_name.loc[available_genres_books.tag_id].values

In [None]:
def build_chart(genre, percentile=0.85):
    df = available_genres_books[available_genres_books['genre'] == genre.lower()]
    qualified = books.set_index('book_id').loc[df.goodreads_book_id]

    v = qualified['ratings_count']
    m = qualified['ratings_count'].quantile(percentile)
    R = qualified['average_rating']
    C = qualified['average_rating'].mean()
    qualified['weighted_rating'] = (R*v + C*m) / (v + m)

    qualified.sort_values('weighted_rating', ascending=False, inplace=True)
    return qualified

In [None]:
cols = ['title','authors','original_publication_year','average_rating','ratings_count','weighted_rating']


**SIMPLE RECOMMENDER 1.2**
Getting top books based on genres only

In [None]:
# get recommendations based on input genre, we can change the number of rows
genre = 'Manga'
build_chart(genre)[cols].head(10)

Unnamed: 0_level_0,title,authors,original_publication_year,average_rating,ratings_count,weighted_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
43070,The Essential Calvin and Hobbes: A Calvin and ...,Bill Watterson,1988,4.65,93001,4.513955
77727,Calvin and Hobbes,"Bill Watterson, G.B. Trudeau",1987,4.61,117788,4.505829
15195,"The Complete Maus (Maus, #1-2)",Art Spiegelman,1986,4.53,76785,4.421728
870,"Fullmetal Alchemist, Vol. 1 (Fullmetal Alchemi...","Hiromu Arakawa, Akira Watanabe",2002,4.49,93990,4.408278
24818,The Days Are Just Packed: A Calvin and Hobbes ...,Bill Watterson,1993,4.68,19143,4.370816
4634266,NARUTO -ナルト- 巻ノ四十三,Masashi Kishimoto,2008,4.57,26364,4.361724
13154150,"Attack on Titan, Vol. 1 (Attack on Titan, #1)","Hajime Isayama, Sheldon Drzka",2010,4.42,82565,4.356482
24816,Homicidal Psycho Jungle Cat: A Calvin and Hobb...,Bill Watterson,1994,4.71,14113,4.352309
1237398,"One Piece, Volume 01: Romance Dawn (One Piece,...","Eiichirō Oda, Andy Nakatani",1997,4.42,67047,4.347842
6465707,"The Walking Dead, Compendium 1",Robert Kirkman,2009,4.43,57764,4.346963


In [None]:
genre = 'Art'
build_chart(genre)[cols].head(5)

Unnamed: 0_level_0,title,authors,original_publication_year,average_rating,ratings_count,weighted_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
77727,Calvin and Hobbes,"[billwatterson, g.b.trudeau]",1987,4.61,117788,4.451946
43070,The Essential Calvin and Hobbes: A Calvin and Hobbes Treasury,[billwatterson],1988,4.65,93001,4.450523
15195,"The Complete Maus (Maus, #1-2)",[artspiegelman],1986,4.53,76785,4.349103
24813,The Calvin and Hobbes Tenth Anniversary Book,[billwatterson],1995,4.63,48280,4.345617
24812,The Complete Calvin and Hobbes,[billwatterson],2005,4.82,28900,4.345345


In [None]:
genre = 'Science'
build_chart(genre)[cols].head(5)

Unnamed: 0_level_0,title,authors,original_publication_year,average_rating,ratings_count,weighted_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
18007564,The Martian,[andyweir],2012,4.39,423344,4.342548
13,The Ultimate Hitchhiker's Guide to the Galaxy,[douglasadams],1996,4.37,222842,4.29297
375802,"Ender's Game (Ender's Saga, #1)",[orsonscottcard],1985,4.3,813439,4.279849
7784,The Lorax,[dr.seuss],1971,4.35,226564,4.278104
4069,Man's Search for Meaning,[viktore.frankl],1946,4.33,171281,4.245962


In [None]:
genre = 'Psychology'
build_chart(genre)[cols].head(5)

Unnamed: 0_level_0,title,authors,original_publication_year,average_rating,ratings_count,weighted_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4069,Man's Search for Meaning,[viktore.frankl],1946,4.33,171281,4.234974
2153405,Still Alice,[lisagenova],2007,4.3,223432,4.228842
7445,The Glass Castle,[jeannettewalls],2005,4.24,621099,4.215703
4934,The Brothers Karamazov,"[fyodordostoyevsky, richardpevear, larissavolokhonsky]",1880,4.3,155838,4.20671
25899336,When Breath Becomes Air,"[paulkalanithi, abrahamverghese]",2016,4.32,116771,4.199367


# Content Based Recommender <a id="6"></a> <br>



In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [None]:
books['authors'] = books['authors'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x.split(', ')])

In [None]:
def get_genres(x):
    t = book_tags[book_tags.goodreads_book_id==x]
    return [i.lower().replace(" ", "") for i in tags.tag_name.loc[t.tag_id].values]

In [None]:
books['genres'] = books.book_id.apply(get_genres)

In [None]:
books['soup'] = books.apply(lambda x: ' '.join([x['title']] + x['authors'] + x['genres']), axis=1)

In [None]:
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
count_matrix = count.fit_transform(books['soup'])

## Cosine Similarity <a id="7"></a> <br>


In [None]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [None]:
indices = pd.Series(books.index, index=books['title'])
titles = books['title']

In [None]:
pd.set_option('display.max_colwidth', 250)

def get_recommendations(title, n=10):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    book_indices = [i[0] for i in sim_scores]
    #return list(titles.iloc[book_indices].values)[:n]
    recommendations = list(titles.iloc[book_indices].values)[:n]
    recommendations_df = pd.DataFrame(recommendations, columns=['Recommended Books'])
    return recommendations_df.head(10)

**CONTENT BASED RECOMMENDER 2.1**
Getting recommendations based on title (exact title) similarity

In [None]:
get_recommendations("The Alchemist")

Unnamed: 0,Recommended Books
0,A Thousand Splendid Suns
1,The Hour I First Believed
2,The Kite Runner
3,Ham on Rye
4,The Uncommon Reader
5,Housekeeping
6,Little Children
7,The Thirteenth Tale
8,Jonathan Livingston Seagull
9,Then We Came to the End


In [None]:
get_recommendations("The Little Prince")

Unnamed: 0,Recommended Books
0,Peter Pan
1,Alice's Adventures in Wonderland
2,Esio Trot
3,Matilda
4,The Phantom Tollbooth
5,Alice in Wonderland
6,Fantastic Mr. Fox
7,The Best Christmas Pageant Ever (The Herdmans #1)
8,The Little Mermaid
9,The Giraffe and the Pelly and Me


In [None]:
get_recommendations("Paper Towns")

Unnamed: 0,Recommended Books
0,Me and Earl and the Dying Girl
1,The DUFF: Designated Ugly Fat Friend
2,Fangirl
3,An Abundance of Katherines
4,The Spectacular Now
5,Let It Snow: Three Holiday Romances
6,Eleanor & Park
7,The Beginning of Everything
8,It's Kind of a Funny Story
9,Looking for Alaska


In [None]:
get_recommendations("A Time to Kill")

Unnamed: 0,Recommended Books
0,The Pelican Brief
1,"The Firm (Penguin Readers, Level 5)"
2,The Runaway Jury
3,The Client
4,The Partner
5,The Chamber
6,The Rainmaker
7,The Summons
8,The Firm
9,The Street Lawyer


**CONTENT BASED RECOMMENDER 2.2**
Getting recommendations based on 'partial' title similarity.
This means if we forgot the exact title, we could still get recommendations based on keywords

In [None]:
def get_name_from_partial(title):
    return list(books.title[books.title.str.lower().str.contains(title) == True].values)

In [None]:
pd.set_option('display.max_colwidth', 250)

title = "potter"

recommendations = get_name_from_partial(title)
recommendations_df = pd.DataFrame(recommendations, columns=['Title'])
display(recommendations_df.head(10))

Unnamed: 0,Title
0,"Harry Potter and the Sorcerer's Stone (Harry Potter, #1)"
1,"Harry Potter and the Prisoner of Azkaban (Harry Potter, #3)"
2,"Harry Potter and the Order of the Phoenix (Harry Potter, #5)"
3,"Harry Potter and the Chamber of Secrets (Harry Potter, #2)"
4,"Harry Potter and the Goblet of Fire (Harry Potter, #4)"
5,"Harry Potter and the Deathly Hallows (Harry Potter, #7)"
6,"Harry Potter and the Half-Blood Prince (Harry Potter, #6)"
7,"Harry Potter and the Cursed Child - Parts One and Two (Harry Potter, #8)"
8,"Harry Potter Boxset (Harry Potter, #1-7)"
9,Harry Potter: Film Wizardry


In [None]:
title = "prince"

recommendations = get_name_from_partial(title)
recommendations_df = pd.DataFrame(recommendations, columns=['Title'])
display(recommendations_df.head(5))

Unnamed: 0,Title
0,"Harry Potter and the Half-Blood Prince (Harry Potter, #6)"
1,The Little Prince
2,The Princess Bride
3,"Prince Caspian (Chronicles of Narnia, #2)"
4,"Clockwork Prince (The Infernal Devices, #2)"


In [None]:
title = "paper"

recommendations = get_name_from_partial(title)
recommendations_df = pd.DataFrame(recommendations, columns=['Title'])
display(recommendations_df.head(5))

Unnamed: 0,Title
0,Paper Towns
1,The Yellow Wallpaper and Other Stories
2,Kindle Paperwhite User's Guide
3,The Yellow Wall-Paper
4,"The Paper Magician (The Paper Magician Trilogy, #1)"


## Popularity and Ratings <a id="8"></a> <br>

In [None]:
def improved_recommendations(title, n=10):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    book_indices = [i[0] for i in sim_scores]
    df = books.iloc[book_indices][['title', 'ratings_count', 'average_rating', 'weighted_rating']]

    v = df['ratings_count']
    m = df['ratings_count'].quantile(0.60)
    R = df['average_rating']
    C = df['average_rating'].mean()
    df['weighted_rating'] = (R*v + C*m) / (v + m)

    qualified = df[df['ratings_count'] >= m]
    qualified = qualified.sort_values('weighted_rating', ascending=False)
    return qualified.head(n)

**CONTENT BASED RECOMMENDER 2.3**
Getting recommendations based on title similarity (exact title), and ratings.

In [None]:
improved_recommendations("The Alchemist")

Unnamed: 0,title,ratings_count,average_rating,weighted_rating
66,A Thousand Splendid Suns,818742,4.34,4.316455
10,The Kite Runner,1813044,4.26,4.251152
1255,City of Thieves,68063,4.28,4.131438
1443,Ham on Rye,54140,4.16,4.048827
404,Breakfast of Champions,178154,4.08,4.046783
1299,Those Who Save Us,68480,4.12,4.039125
358,And the Mountains Echoed,199326,4.03,4.009563
1317,Post Office,59295,4.01,3.972371
1956,I Capture the Castle,56444,4.0,3.966033
248,Extremely Loud and Incredibly Close,294726,3.97,3.96386


In [None]:
improved_recommendations("The Little Prince")

Unnamed: 0,title,ratings_count,average_rating,weighted_rating
183,Matilda,440743,4.29,4.262311
372,The BFG,245855,4.22,4.189927
560,The Phantom Tollbooth,178432,4.22,4.181648
867,Pippi Longstocking,123065,4.11,4.099013
517,Peter Pan,169306,4.1,4.093987
228,Alice's Adventures in Wonderland & Through the Looking-Glass,340920,4.06,4.063045
1252,"Mary Poppins (Mary Poppins, #1)",82706,4.05,4.062771
256,Alice in Wonderland,343252,4.03,4.037893
825,The Wind in the Willows,124612,3.98,4.014316
1326,The Jungle Book,67107,3.94,4.008993


In [None]:
improved_recommendations("Paper Towns")

Unnamed: 0,title,ratings_count,average_rating,weighted_rating
163,Eleanor & Park,514312,4.11,4.075137
323,Fangirl,340379,4.12,4.069332
73,Looking for Alaska,783470,4.09,4.068163
920,All the Bright Places,132087,4.19,4.064101
563,It's Kind of a Funny Story,161001,4.14,4.048287
362,"Anna and the French Kiss (Anna and the French Kiss, #1)",259479,4.08,4.030963
474,"Where She Went (If I Stay, #2)",193141,4.06,4.008082
146,Thirteen Reasons Why,463783,4.02,4.000345
148,"If I Stay (If I Stay, #1)",503527,3.96,3.952982
829,The DUFF: Designated Ugly Fat Friend,136736,3.86,3.889164


In [None]:
improved_recommendations("A Time to Kill")

Unnamed: 0,title,ratings_count,average_rating,weighted_rating
208,"The Silence of the Lambs (Hannibal Lecter, #2)",351107,4.14,4.094907
1175,Mystic River,86103,4.19,4.049644
965,Presumed Innocent,91774,4.1,4.006887
981,"The Alienist (Dr. Laszlo Kreizler, #1)",96981,4.05,3.98261
122,"The Firm (Penguin Readers, Level 5)",488269,3.99,3.977484
226,The Client,320083,3.97,3.956369
342,The Runaway Jury,222712,3.96,3.944734
280,The Pelican Brief,288376,3.95,3.939665
595,The Rainmaker,137412,3.91,3.907619
962,The Partner,86870,3.89,3.896669


# Collaborative Filtering <a id="9"></a> <br>


There are two classes of Collaborative Filtering:
- **User-based**, which measures the similarity between target users and other users.
- **Item-based**, which measures the similarity between the items that target users rate or interact with and other items.

## - User Based <a id="10"></a> <br>

In [None]:
! pip install surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3095444 sha256=215e1aed84150c2a18182c4932a272039ee62175233f60a2934c8b7bf1b6f736
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.

In [None]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

In [None]:
reader = Reader()
data = Dataset.load_from_df(new_ratings[['user_id', 'book_id', 'rating']], reader)

In [None]:
svd = SVD()
cross_validate(svd, data, measures=['RMSE', 'MAE'])

{'test_rmse': array([0.84140049, 0.84119386, 0.84193854, 0.84355606, 0.84307013]),
 'test_mae': array([0.65840929, 0.65725224, 0.65804825, 0.65962532, 0.65917975]),
 'fit_time': (29.72565770149231,
  19.85924983024597,
  20.603286266326904,
  20.73286247253418,
  19.6564679145813),
 'test_time': (2.741057872772217,
  2.3161046504974365,
  2.3721680641174316,
  2.742370367050171,
  2.363618850708008)}

We get a mean **Root Mean Sqaure Error** of about 0.8419 which is more than good enough for our case. We can now train on our dataset and arrive at predictions.

In [None]:
trainset = data.build_full_trainset()
svd.fit(trainset);

**COLLABORATIVE FILTERING RECOMMENDER (USER BASED)**
Getting recommendations based on the previous interest of the user.

In [None]:
# get recommendations based on previous interest of user number 10
new_ratings[new_ratings['user_id'] == 10]

Unnamed: 0,book_id,user_id,rating,title
150478,1506,10,4,The Zahir
282986,2833,10,4,"The Prisoner of Heaven (The Cemetery of Forgotten Books, #3)"
340448,3409,10,5,The Winner Stands Alone
393966,3946,10,5,Matterhorn
452158,4531,10,4,The Joke
506878,5084,10,2,The Sheltering Sky
588312,5907,10,4,Our Mutual Friend
590191,5926,10,2,The Night Watch
610487,6131,10,2,The Longest Day
696035,7002,10,5,A Mercy


In [None]:
new_ratings[new_ratings['user_id'] == 167]

Unnamed: 0,book_id,user_id,rating,title
331566,3320,167,5,The Philosophy of Andy Warhol (From A to B and Back Again)
446606,4475,167,5,Zorba the Greek
522733,5244,167,3,The Baron in the Trees
685392,6892,167,4,CivilWarLand in Bad Decline
738490,7436,167,3,"The Seventh Scroll (Ancient Egypt, #2)"
744828,7501,167,3,"Post Captain (Aubrey/Maturin, #2)"
770830,7769,167,3,Scoop
858885,8687,167,4,H.M.S. Surprise
981466,9998,167,3,The Mauritius Command


In [None]:
svd.predict(10, 1506)

Prediction(uid=10, iid=1506, r_ui=None, est=3.045378358275658, details={'was_impossible': False})

For book with ID 1506, we get an estimated prediction of **3.393**.

## - Item Based <a id="11"></a> <br>

In [None]:
# bookmat = new_ratings.groupby(['user_id', 'title'])['rating'].mean().unstack()
bookmat = new_ratings.pivot_table(index='user_id', columns='title', values='rating')

In [None]:
def get_similar(title, mat):
    title_user_ratings = mat[title]
    similar_to_title = mat.corrwith(title_user_ratings)
    corr_title = pd.DataFrame(similar_to_title, columns=['correlation'])
    corr_title.dropna(inplace=True)
    corr_title.sort_values('correlation', ascending=False, inplace=True)
    return corr_title

**COLLABORATIVE FILTERING RECOMMENDER (ITEM BASED)**
Getting recommendations based on the previous ratings of the user and similarity on other items

In [None]:
title = "The Alchemist"
smlr = get_similar(title, bookmat)
smlr = smlr.join(books.set_index('title')['ratings_count'])
smlr[smlr.ratings_count > 5e5].sort_values('correlation', ascending=False).head(10)

Unnamed: 0_level_0,correlation,ratings_count
title,Unnamed: 1_level_1,Unnamed: 2_level_1
The Alchemist,1.0,1299566
"Matched (Matched, #1)",0.879883,511815
Looking for Alaska,0.808746,783470
Into the Wild,0.788526,647684
"City of Bones (The Mortal Instruments, #1)",0.690263,1154031
"Outlander (Outlander, #1)",0.679342,515547
The Girl on the Train,0.673469,1008778
"Divergent (Divergent, #1)",0.672543,1903563
Eleanor & Park,0.644604,514312
Fahrenheit 451,0.583424,570498


In [None]:
title = "The Little Prince"
smlr = get_similar(title, bookmat)
smlr = smlr.join(books.set_index('title')['ratings_count'])
smlr[smlr.ratings_count > 5e5].sort_values('correlation', ascending=False).head(10)

Unnamed: 0_level_0,correlation,ratings_count
title,Unnamed: 1_level_1,Unnamed: 2_level_1
The Little Prince,1.0,738757
"Outlander (Outlander, #1)",0.779194,515547
"If I Stay (If I Stay, #1)",0.645497,503527
"Insurgent (Divergent, #2)",0.642364,836362
Slaughterhouse-Five,0.63071,846488
Into the Wild,0.584924,647684
"A Clash of Kings (A Song of Ice and Fire, #2)",0.580444,523303
"The Lion, the Witch, and the Wardrobe (Chronicles of Narnia, #1)",0.544374,1531800
Steve Jobs,0.5,560715
The Princess Bride,0.488504,628637


# Hybrid Recommender <a id="9"></a> <br>


In [None]:
def improved_hybrid(user_id, title, n=3):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:51]
    book_indices = [i[0] for i in sim_scores]

    df = books.iloc[book_indices][['book_id', 'title', 'ratings_count', 'average_rating', 'original_publication_year']]
    v = df['ratings_count']
    m = df['ratings_count'].quantile(0.60)
    R = df['average_rating']
    C = df['average_rating'].mean()
    df['weighted_rating'] = (R*v + C*m) / (v + m)

    df['est'] = df['book_id'].apply(lambda x: svd.predict(user_id, x).est)

    df['score'] = (df['est'] + df['weighted_rating']) / 2
    df = df.sort_values('score', ascending=False)
    return df[['book_id', 'title', 'original_publication_year', 'ratings_count', 'average_rating', 'score']].head(n)

In [None]:
improved_hybrid(5, 'Romeo and Juliet')

Unnamed: 0,book_id,title,original_publication_year,ratings_count,average_rating,score
153,8852,Macbeth,1606,496018,3.88,4.204
124,1420,Hamlet,1600,515820,4.0,4.065959
9,1885,Pride and Prejudice,1813,2035490,4.24,4.054099


In [None]:
improved_hybrid(43, 'Romeo and Juliet')

Unnamed: 0,book_id,title,original_publication_year,ratings_count,average_rating,score
9,1885,Pride and Prejudice,1813,2035490,4.24,4.056052
153,8852,Macbeth,1606,496018,3.88,4.015051
13,7613,Animal Farm,1945,1881700,3.87,4.005343


In [None]:
improved_hybrid(50000, 'Romeo and Juliet')

Unnamed: 0,book_id,title,original_publication_year,ratings_count,average_rating,score
9,1885,Pride and Prejudice,1813,2035490,4.24,4.279916
13,7613,Animal Farm,1945,1881700,3.87,4.23587
124,1420,Hamlet,1600,515820,4.0,4.229491


In [None]:
improved_hybrid(5, 'Money')

Unnamed: 0,book_id,title,original_publication_year,ratings_count,average_rating,score
4982,3692,The Heart of the Matter,1948,19149,3.98,4.21064
475,7069,The World According to Garp,1978,167106,4.07,4.161214
3320,7805,Pale Fire,1962,26377,4.19,4.150973


In [None]:
improved_hybrid(43, 'Money')

Unnamed: 0,book_id,title,original_publication_year,ratings_count,average_rating,score
4982,3692,The Heart of the Matter,1948,19149,3.98,4.113175
3320,7805,Pale Fire,1962,26377,4.19,4.101428
475,7069,The World According to Garp,1978,167106,4.07,4.087398


In [None]:
improved_hybrid(50000, 'Money')

Unnamed: 0,book_id,title,original_publication_year,ratings_count,average_rating,score
3320,7805,Pale Fire,1962,26377,4.19,4.349282
475,7069,The World According to Garp,1978,167106,4.07,4.331412
4982,3692,The Heart of the Matter,1948,19149,3.98,4.307955
