# <u>Practice Day 18</u>
Item-Based Collaborative Filtering(IB-CF)
***
Date: 18th November 2019<br>
Author: Samuel Natamihardja<br>
Company: Home Credit Indonesia

#### Importing Library

In [88]:
import pandas as pd #data wrangling
import numpy as np #calculation
import matplotlib.pyplot as plt #visualization
import seaborn as sns #visualization


from scipy.sparse import csr_matrix #prepare matrix

#Model
import surprise
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import Reader


#Model Evaluation
from sklearn import metrics
from sklearn.metrics import auc, roc_curve
from sklearn.metrics import accuracy_score

## Importing Dataset

In [89]:
#importing dataset
books = pd.read_csv('new_dataset/new_books.csv')
ratings = pd.read_csv('new_dataset/ratings.csv')

#### Copy Dataset

In [90]:
df_books = books[['book_id','original_publication_year','title','authors','tag_name','image_url']]

#### Handling Missing Value

In [91]:
df_books.isnull().sum()

book_id                       0
original_publication_year    21
title                         0
authors                       0
tag_name                      0
image_url                     0
dtype: int64

In [92]:
df_books.dropna(inplace = True)
df_books.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


book_id                      0
original_publication_year    0
title                        0
authors                      0
tag_name                     0
image_url                    0
dtype: int64

In [93]:
df_ratings.isnull().sum()

book_id    0
user_id    0
rating     0
dtype: int64

In [94]:
df_ratings = df_ratings.astype(int)

In [95]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 981756 entries, 0 to 981755
Data columns (total 3 columns):
book_id    981756 non-null int32
user_id    981756 non-null int32
rating     981756 non-null int32
dtypes: int32(3)
memory usage: 11.2 MB


### Final Dataset

Dataset books

In [96]:
df_books = df_books.astype({"original_publication_year": int})
df_books.head()

Unnamed: 0,book_id,original_publication_year,title,authors,tag_name,image_url
0,2767052,2008,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,young adult,https://images.gr-assets.com/books/1447303603m...
1,3,1997,Harry Potter and the Sorcerer's Stone (Harry P...,"J.K. Rowling, Mary GrandPré",fantasy,https://images.gr-assets.com/books/1474154022m...
2,41865,2005,"Twilight (Twilight, #1)",Stephenie Meyer,young adult,https://images.gr-assets.com/books/1361039443m...
3,2657,1960,To Kill a Mockingbird,Harper Lee,classics,https://images.gr-assets.com/books/1361975680m...
4,4671,1925,The Great Gatsby,F. Scott Fitzgerald,classics,https://images.gr-assets.com/books/1490528560m...


Dataset books rating

In [97]:
df_ratings = ratings.copy()
df_ratings.head()

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4


## Item-Based Collaborative Filtering(IB-CF)
With this method, item to item filtering tried to find items similarity.
Example question: "Users who liked this item also liked ..."

In [185]:
#reader
reader = Reader(rating_scale=(1, 5))

In [186]:
#prepare data that going tobe used by model
data = Dataset.load_from_df(df_ratings[["user_id", "book_id", "rating"]].head(5000), reader)

In [187]:
#modelling with KNN with means
model_knn = KNNWithMeans(sim_options={"name":"msd","user_base":False},k=5)

In [188]:
#prepare training data
data_train = data.build_full_trainset()

In [189]:
#fit training data to model
model_knn.fit(data_train)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x26f953c8>

### Prediction Test

recommend books to our top reviewer user.

In [190]:
#show top 5 user based on their reviews
df_ratings['user_id'].value_counts().head(5)

12874    200
30944    200
52036    199
28158    199
12381    199
Name: user_id, dtype: int64

In [191]:
#temporary dataset for prediction result
pred = pd.DataFrame()

#looping book recommendation for user: 12874
for i in range(1,len(df_books)):
    pred.set_value(i,'book_id',df_books.book_id.iloc[i])
    pred.set_value(i,'predRating',model_knn.predict(12874, df_books.book_id.iloc[i]).est)

  
  import sys


In [192]:
#display recommendation result for user: 12874
final_pred = pd.merge(left=pred,right=df_books, left_on='book_id', right_on='book_id')
display(final_pred[~final_pred.book_id.isin(df_ratings[['book_id']][df_ratings.user_id == 12874])].sort_values('predRating',ascending=False).head(10))

Unnamed: 0,book_id,predRating,original_publication_year,title,authors,tag_name,image_url
1907,25.0,4.708366,1998,I'm a Stranger Here Myself: Notes on Returning...,Bill Bryson,nonfiction,https://s.gr-assets.com/assets/nophoto/book/11...
2245,26.0,4.65339,1989,The Lost Continent: Travels in Small Town America,Bill Bryson,travel,https://images.gr-assets.com/books/1404042682m...
25,1.0,4.613233,2005,Harry Potter and the Half-Blood Prince (Harry ...,"J.K. Rowling, Mary GrandPré",fantasy,https://images.gr-assets.com/books/1361039191m...
16,5.0,4.406787,1999,Harry Potter and the Prisoner of Azkaban (Harr...,"J.K. Rowling, Mary GrandPré, Rufus Beck",fantasy,https://images.gr-assets.com/books/1499277281m...
320,13.0,4.326805,1996,The Ultimate Hitchhiker's Guide to the Galaxy,Douglas Adams,science fiction,https://images.gr-assets.com/books/1404613595m...
3647,10.0,4.17325,2005,"Harry Potter Collection (Harry Potter, #1-6)",J.K. Rowling,fantasy,https://images.gr-assets.com/books/1328867351m...
19,2.0,4.162607,2003,Harry Potter and the Order of the Phoenix (Har...,"J.K. Rowling, Mary GrandPré",fantasy,https://images.gr-assets.com/books/1387141547m...
370,50.0,4.123626,1986,"Hatchet (Brian's Saga, #1)",Gary Paulsen,young adult,https://s.gr-assets.com/assets/nophoto/book/11...
356,21.0,4.097652,2003,A Short History of Nearly Everything,Bill Bryson,history,https://s.gr-assets.com/assets/nophoto/book/11...
177,33.0,4.036739,1955,"The Lord of the Rings (The Lord of the Rings, ...",J.R.R. Tolkien,fantasy,https://images.gr-assets.com/books/1411114164m...
