# Recommender System: Collaborative Filtering with Goodreads Dataset From Kaggle

Collaborative Filtering is one of recommender system's method that aims aims at learning predictive models of user preferences, interests or behavior from community data, that is, a database of available user preferences

Source: Latent semantic models for collaborative filtering

In [1]:
import pandas as pd
import numpy as np

In [2]:
#Read the dataset books
df_books = pd.read_csv("books.csv")

#Read the ratings from user
df_ratings = pd.read_csv("ratings.csv")

pd.set_option('display.max_rows', None)

In [3]:
#Select Used Columns from 'df_books'
df_books = df_books[['book_id','title']]

In [4]:
#Checking Unique Values on column 'book_id' from dat
df_books.book_id.nunique()

10000

In [5]:
#Checking Unique Values on column 'book_id' from dat
df_ratings.book_id.nunique()

10000

In [6]:
#Merge two dataframes (inner merger)
df_bookratings = pd.merge(df_books, df_ratings, how='inner')

In [7]:
#Checking the Info of All Column
df_books.info()

print('========================')

#Checking if there any missing values on data
# df_ratings.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   book_id  10000 non-null  int64 
 1   title    10000 non-null  object
dtypes: int64(1), object(1)
memory usage: 156.4+ KB


In [8]:
df_bookratings.isnull().sum()

book_id    0
title      0
user_id    0
rating     0
dtype: int64

In [9]:
#Pivot the dataframe
df_bookratings = df_bookratings.pivot_table(index=['user_id'], columns=['title'], values='rating')

In [10]:
df_bookratings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28906 entries, 2 to 53424
Columns: 812 entries, 'Salem's Lot to number9dream
dtypes: float64(812)
memory usage: 179.3 MB


In [11]:
#Remove Books which have less than 15 user who rated it.
df_bookratings= df_bookratings.dropna(thresh=15,axis=1)

In [12]:
#Fill Nan With 0
df_bookratings = df_bookratings.fillna(0)

In [13]:
#Similarity Matrix
books_similarity = df_bookratings.corr(method='pearson')
books_similarity.head()

title,'Salem's Lot,"'Tis (Frank McCourt, #2)",1421: The Year China Discovered America,1776,1984,A Bend in the River,A Bend in the Road,A Brief History of Time,A Briefer History of Time,A Case of Need,...,"Women in Love (Brangwen Family, #2)",World War Z: An Oral History of the Zombie War,"World Without End (The Kingsbridge Series, #2)",Wuthering Heights,"Xenocide (Ender's Saga, #3)",Year of Wonders,You Shall Know Our Velocity!,Zen and the Art of Motorcycle Maintenance: An Inquiry Into Values,Zodiac,number9dream
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Salem's Lot,1.0,-0.002777,-0.002759,-0.00279,-0.002741,-0.0027,-0.002803,-0.002744,-0.002784,-0.002641,...,-0.002794,-0.002707,-0.002655,-0.002809,-0.002712,-0.002764,-0.002751,-0.002741,-0.002759,-0.002747
"'Tis (Frank McCourt, #2)",-0.002777,1.0,0.017751,0.00598,-0.003238,-0.003189,-0.003311,-0.003241,-0.003289,-0.003119,...,-0.0033,-0.003198,-0.003136,0.038246,0.001339,-0.003265,-0.00325,-0.003237,-0.003258,-0.003245
1421: The Year China Discovered America,-0.002759,0.017751,1.0,0.051997,-0.003217,-0.003168,-0.00329,-0.00322,0.026612,-0.003099,...,-0.003278,-0.003177,0.004341,0.039667,-0.003182,-0.003244,-0.003229,-0.003216,-0.003237,0.00647
1776,-0.00279,0.00598,0.051997,1.0,-0.003253,-0.003204,-0.003327,-0.003257,0.481489,-0.003134,...,-0.003315,-0.003213,-0.003151,0.03237,-0.003218,0.035862,-0.003265,0.012921,0.030697,-0.003261
1984,-0.002741,-0.003238,-0.003217,-0.003253,1.0,-0.003148,-0.003269,-0.0032,-0.003247,-0.003079,...,-0.003258,-0.003157,-0.003096,0.005423,-0.003162,-9.6e-05,-0.003208,0.008296,-0.003217,-0.003204


In [14]:
#Function to Find the Similarity Values
def sim_books(title,rating):
    sim_score = books_similarity[title]*(rating-4)
    sim_score = sim_score.sort_values(ascending=False)
    
    return sim_score

In [16]:
#Testing the Function
# print(sim_books('1776',5))

# Book's Title

In [18]:
#Print all the books title
for col in df_bookratings:
    print(col)

'Salem's Lot
'Tis (Frank McCourt, #2)
1421: The Year China Discovered America
1776
1984
A Bend in the River
A Bend in the Road
A Brief History of Time
A Briefer History of Time
A Case of Need
A Christmas Carol
A Christmas Carol and Other Christmas Writings
A Fine Balance
A Great and Terrible Beauty (Gemma Doyle, #1)
A Heartbreaking Work of Staggering Genius
A History of God: The 4,000-Year Quest of Judaism, Christianity, and Islam
A History of the World in 6 Glasses
A Home at the End of the World
A House for Mr Biswas
A Lesson Before Dying
A Little Princess
A Living Nightmare (Cirque Du Freak, #1)
A Man Without a Country
A Map of the World
A Midsummer Night's Dream
A Million Little Pieces
A Modest Proposal and Other Satirical Works
A Moveable Feast
A Painted House
A People's History of the United States
A Portrait of the Artist as a Young Man
A Prayer for Owen Meany
A Raisin in the Sun
A Room with a View
A Separate Peace
A Short History of Nearly Everything
A Son of the Circus
A Spot o

# The Recommendation

In [39]:
i = 0
similarbooks = pd.DataFrame()
numbooks = int(input("How many books do you want to rate?: "))
for i in range(numbooks):
    booktitle = input("What book's title: ")
    bookrating = int(input("How many score do you give (0-5): "))
    similarbooks = similarbooks.append(sim_books(booktitle,bookrating),ignore_index=True)

How many books do you want to rate?: 1
What book's title: Bleach, Volume 01
How many score do you give (0-5): 5


Bleach, Volume 01                                                                                                         1.000000
A Walk to Remember                                                                                                        0.037570
Wild Swans: Three Daughters of China                                                                                      0.035313
Little Women (Little Women, #1)                                                                                           0.033327
Jack: Straight from the Gut                                                                                               0.031489
The Catcher in the Rye                                                                                                    0.029071
Point of Origin (Kay Scarpetta, #9)                                                                                       0.028657
The Fortress of Solitude                                                           

In [45]:
print('Here Some Books Recommendation For You')    
recommendation = pd.DataFrame()
similarbooks.head()
recommendation = similar_books.sum().sort_values(ascending=False)
recommendation.head(10)

Here Some Books Recommendation For You


Bleach, Volume 01                       1.000000
A Walk to Remember                      0.037570
Wild Swans: Three Daughters of China    0.035313
Little Women (Little Women, #1)         0.033327
Jack: Straight from the Gut             0.031489
The Catcher in the Rye                  0.029071
Point of Origin (Kay Scarpetta, #9)     0.028657
The Fortress of Solitude                0.024781
Bridge to Terabithia                    0.024500
Fight Club                              0.024294
dtype: float64