## IMPORT LIBRARIES AND DATASET

In [2]:
import numpy as np
import pandas as pd

In [3]:
books = pd.read_csv('books.csv')
books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10000 non-null  int64  
 1   book_id                    10000 non-null  int64  
 2   best_book_id               10000 non-null  int64  
 3   work_id                    10000 non-null  int64  
 4   books_count                10000 non-null  int64  
 5   isbn                       9300 non-null   object 
 6   isbn13                     9415 non-null   float64
 7   authors                    10000 non-null  object 
 8   original_publication_year  9979 non-null   float64
 9   original_title             9415 non-null   object 
 10  title                      10000 non-null  object 
 11  language_code              8916 non-null   object 
 12  average_rating             10000 non-null  float64
 13  ratings_count              10000 non-null  int6

In [4]:
book = books[['book_id','authors','title']]
book.head()

Unnamed: 0,book_id,authors,title
0,2767052,Suzanne Collins,"The Hunger Games (The Hunger Games, #1)"
1,3,"J.K. Rowling, Mary GrandPré",Harry Potter and the Sorcerer's Stone (Harry P...
2,41865,Stephenie Meyer,"Twilight (Twilight, #1)"
3,2657,Harper Lee,To Kill a Mockingbird
4,4671,F. Scott Fitzgerald,The Great Gatsby


In [5]:
book.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   book_id  10000 non-null  int64 
 1   authors  10000 non-null  object
 2   title    10000 non-null  object
dtypes: int64(1), object(2)
memory usage: 234.5+ KB


In [6]:
ratings = pd.read_csv('ratings.csv')

In [7]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 981756 entries, 0 to 981755
Data columns (total 3 columns):
 #   Column   Non-Null Count   Dtype
---  ------   --------------   -----
 0   book_id  981756 non-null  int64
 1   user_id  981756 non-null  int64
 2   rating   981756 non-null  int64
dtypes: int64(3)
memory usage: 22.5 MB


In [8]:
ratings['rating'].unique()

array([5, 3, 4, 1, 2])

In [9]:
books_data = pd.merge(book, ratings, on='book_id')

## USER BASED COLLABORATIVE FILTER

In [15]:
from surprise import Reader, Dataset, SVD,accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

reader = Reader(rating_scale=(1,5))


data = Dataset.load_from_df(ratings[['book_id', 'user_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25)



algo = SVD()
algo.fit(trainset)
predictions = algo.test(testset)
accuracy.rmse(predictions, verbose=True)


RMSE: 0.8458


0.8457835435285036

### RMSE ACCURACY OBTAINED - 84.58%

In [12]:
def recommendation(user_id):
    user = book.copy()
    already_read = books_data[books_data['user_id'] == user_id]['book_id'].unique()
    user = user.reset_index()
    user = user[~user['book_id'].isin(already_read)]
    user['Estimate_Score']=user['book_id'].apply(lambda x: algo.predict(user_id, x).est)
    user = user.drop('book_id', axis = 1)
    user = user.sort_values('Estimate_Score', ascending=False)
    print(user.head(10))

In [13]:
recommendation(2)

      index                               authors  \
3060   3060                            C.S. Lewis   
3251   3251                            Mark Twain   
2323   2323  Laura Ingalls Wilder, Garth Williams   
156     156                   Dr. Seuss, לאה נאור   
9286   9286                             Max Barry   
157     157             Roald Dahl, Quentin Blake   
6062   6062                           Jack London   
5857   5857                       Agatha Christie   
7916   7916                      Simon Winchester   
8037   8037                         David Eddings   

                                                  title  Estimate_Score  
3060                                The Problem of Pain             5.0  
3251  The Adventures of Tom Sawyer & Adventures of H...             5.0  
2323   By the Shores of Silver Lake  (Little House, #5)             5.0  
156                                  Green Eggs and Ham             5.0  
9286                                Jennifer G

## Enter UserID to get recommendation 

In [14]:
x = 33 #USERID
recommendation(x)

      index                      authors  \
5857   5857              Agatha Christie   
1985   1985                 John Grisham   
711     711               Diana Gabaldon   
155     155                 Emily Giffin   
2314   2314  E.B. White, Fred Marcellino   
1446   1446             Robert McCloskey   
2670   2670                 Maeve Binchy   
2432   2432                 Jodi Picoult   
7916   7916             Simon Winchester   
6062   6062                  Jack London   

                                                  title  Estimate_Score  
5857           Cards on the Table (Hercule Poirot, #15)        5.000000  
1985  The Innocent Man: Murder and Injustice in a Sm...        5.000000  
711                     Drums of Autumn (Outlander, #4)        5.000000  
155             Something Borrowed (Darcy & Rachel, #1)        4.964136  
2314                            The Trumpet of the Swan        4.959543  
1446                             Make Way for Ducklings        4.949584