# <u>Practice Day 18</u>
Item-Based Collaborative Filtering(IB-CF)
***
Date: 18th November 2019<br>
Author: Samuel Natamihardja<br>
Company: Home Credit Indonesia

#### Importing Library

In [88]:
import pandas as pd #data wrangling
import numpy as np #calculation
import matplotlib.pyplot as plt #visualization
import seaborn as sns #visualization


from scipy.sparse import csr_matrix #prepare matrix

#Model
import surprise
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import Reader


#Model Evaluation
from sklearn import metrics
from sklearn.metrics import auc, roc_curve
from sklearn.metrics import accuracy_score

## Importing Dataset

In [89]:
#importing dataset
books = pd.read_csv('new_dataset/new_books.csv')
ratings = pd.read_csv('new_dataset/ratings.csv')

#### Copy Dataset

In [90]:
df_books = books[['book_id','original_publication_year','title','authors','tag_name','image_url']]

#### Handling Missing Value

In [91]:
df_books.isnull().sum()

book_id                       0
original_publication_year    21
title                         0
authors                       0
tag_name                      0
image_url                     0
dtype: int64

In [92]:
df_books.dropna(inplace = True)
df_books.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


book_id                      0
original_publication_year    0
title                        0
authors                      0
tag_name                     0
image_url                    0
dtype: int64

In [93]:
df_ratings.isnull().sum()

book_id    0
user_id    0
rating     0
dtype: int64

In [94]:
df_ratings = df_ratings.astype(int)

In [95]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 981756 entries, 0 to 981755
Data columns (total 3 columns):
book_id    981756 non-null int32
user_id    981756 non-null int32
rating     981756 non-null int32
dtypes: int32(3)
memory usage: 11.2 MB


### Final Dataset

In [96]:
df_books = df_books.astype({"original_publication_year": int})
df_books.head()

Unnamed: 0,book_id,original_publication_year,title,authors,tag_name,image_url
0,2767052,2008,"The Hunger Games (The Hunger Games, #1)",Suzanne Collins,young adult,https://images.gr-assets.com/books/1447303603m...
1,3,1997,Harry Potter and the Sorcerer's Stone (Harry P...,"J.K. Rowling, Mary GrandPré",fantasy,https://images.gr-assets.com/books/1474154022m...
2,41865,2005,"Twilight (Twilight, #1)",Stephenie Meyer,young adult,https://images.gr-assets.com/books/1361039443m...
3,2657,1960,To Kill a Mockingbird,Harper Lee,classics,https://images.gr-assets.com/books/1361975680m...
4,4671,1925,The Great Gatsby,F. Scott Fitzgerald,classics,https://images.gr-assets.com/books/1490528560m...


In [97]:
df_ratings = ratings.copy()
df_ratings.head()

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4


## Item-Based Collaborative Filtering(IB-CF)
With this method, item to item filtering tried to find items similarity.
Example question: "Users who liked this item also liked ..."

In [116]:
reader = Reader(rating_scale=(1, 5))

In [162]:
data = Dataset.load_from_df(df_ratings[["user_id", "book_id", "rating"]].head(5000), reader)

In [148]:
model_knn = KNNWithMeans(sim_options={"name":"msd","user_base":False},k=5)

In [149]:
data_train = data.build_full_trainset()

In [150]:
model_knn.fit(data_train)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x383b1248>

In [173]:
df_ratings['user_id'].value_counts().head(5)

12874    200
30944    200
52036    199
28158    199
12381    199
Name: user_id, dtype: int64

In [171]:
pred = pd.DataFrame()

for i in range(1,len(df_books)):
    pred.set_value(i,'book_id',df_books.book_id.iloc[i])
    pred.set_value(i,'predRating',model_knn.predict(12874, df_books.book_id.iloc[i]).est)

  after removing the cwd from sys.path.
  """


In [175]:
df_ratings['book_id'][df_ratings.user_id == 12874]

126          2
321          4
429          5
634          7
721          8
          ... 
496100    4974
498542    4999
541852    5437
673339    6768
880864    8916
Name: book_id, Length: 200, dtype: int64

In [184]:
final_pred = pd.merge(left=pred,right=df_books, left_on='book_id', right_on='book_id')
display(final_pred[~final_pred.book_id.isin(df_ratings[['book_id']][df_ratings.user_id == 12874])].sort_values('predRating',ascending=False).head(10))

Unnamed: 0,book_id,predRating,original_publication_year,title,authors,tag_name,image_url
54,1618.0,4.796054,2003,The Curious Incident of the Dog in the Night-Time,Mark Haddon,fiction,https://images.gr-assets.com/books/1479863624m...
525,629.0,4.386849,1974,Zen and the Art of Motorcycle Maintenance: An ...,Robert M. Pirsig,philosophy,https://images.gr-assets.com/books/1410136019m...
7044,291.0,4.377107,1912,The Broken Wings,"Kahlil Gibran, Anthony R. Ferris",poetry,https://s.gr-assets.com/assets/nophoto/book/11...
2289,1715.0,4.36145,8,Metamorphoses,"Ovid, David Raeburn, Denis Feeney",poetry,https://s.gr-assets.com/assets/nophoto/book/11...
177,33.0,4.346594,1955,"The Lord of the Rings (The Lord of the Rings, ...",J.R.R. Tolkien,fantasy,https://images.gr-assets.com/books/1411114164m...
2245,26.0,4.329749,1989,The Lost Continent: Travels in Small Town America,Bill Bryson,travel,https://images.gr-assets.com/books/1404042682m...
2242,998.0,4.304476,1995,The Millionaire Next Door: The Surprising Secr...,"Thomas J. Stanley, William D. Danko",fiction,https://s.gr-assets.com/assets/nophoto/book/11...
1162,1428.0,4.302205,1994,By the River Piedra I Sat Down and Wept,"Paulo Coelho, Alan R. Clarke",fiction,https://images.gr-assets.com/books/1466877798m...
4601,1305.0,4.297115,1998,Gates of Fire: An Epic Novel of the Battle of ...,Steven Pressfield,fiction,https://s.gr-assets.com/assets/nophoto/book/11...
488,1869.0,4.290469,2001,Nickel and Dimed: On (Not) Getting By in America,Barbara Ehrenreich,fiction,https://s.gr-assets.com/assets/nophoto/book/11...
