In [1]:
# Libraries for data preparation & visualization
import numpy as np
import seaborn as sns
import pandas as pd
import plotly.io as pio
import matplotlib.pyplot as plt
pio.renderers.default = "png"

# Ignore printing warnings for general readability
import warnings 
warnings.filterwarnings("ignore")


In [2]:
# Loading the dataset 
def loaddata(filename):
    df = pd.read_csv(f'{filename}.csv',sep=';',error_bad_lines=False,warn_bad_lines=False,encoding='latin-1')
    return df

book   = loaddata("BX-Books")
user   = loaddata("BX-Users")
rating = loaddata("BX-Book-Ratings")

In [3]:
#Preprocessing Data
book = book[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']]
book.rename(columns = {'Book-Title':'title', 'Book-Author':'author', 'Year-Of-Publication':'year', 'Publisher':'publisher'}, inplace=True)
user.rename(columns = {'User-ID':'user_id', 'Location':'location', 'Age':'age'}, inplace=True)
rating.rename(columns = {'User-ID':'user_id', 'Book-Rating':'rating'}, inplace=True)

In [4]:
rating

Unnamed: 0,user_id,ISBN,rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [5]:
# Let's visualize ratings given by users

rating_users = rating['user_id'].value_counts().reset_index().\
               rename({'index':'user_id','user_id':'rating'}, axis=1)
rating_users

Unnamed: 0,user_id,rating
0,11676,13602
1,198711,7550
2,153662,6109
3,98391,5891
4,35859,5850
...,...,...
105278,116180,1
105279,116166,1
105280,116154,1
105281,116137,1


In [6]:
# Let's visualize ratings received by books

rating_books = rating['ISBN'].value_counts().reset_index().\
               rename({'index':'ISBN','ISBN':'rating'}, axis=1)
rating_books

Unnamed: 0,ISBN,rating
0,0971880107,2502
1,0316666343,1295
2,0385504209,883
3,0060928336,732
4,0312195516,723
...,...,...
340551,1568656386,1
340552,1568656408,1
340553,1569551553,1
340554,1570081808,1


In [7]:
# In order to avoid rating bias & for making good recommendations, limit the dataset to only those
# users that have made at least 250 ratings & books that have received at least 50 ratings

rating = rating[rating['user_id'].isin(rating_users[rating_users['rating']>250]['user_id'])]
rating = rating[rating['ISBN'].isin(rating_books[rating_books['rating']> 50]['ISBN'])]

rating


Unnamed: 0,user_id,ISBN,rating
1456,277427,002542730X,10
1468,277427,006092988X,0
1469,277427,0060930535,0
1470,277427,0060932139,0
1471,277427,0060934417,0
...,...,...,...
1147440,275970,1400031354,0
1147441,275970,1400031362,0
1147470,275970,1558744606,0
1147517,275970,1573229725,0


In [8]:
len(rating['user_id'].unique())

686

In [9]:
# For the recommendation system, it is prefered to have the book titles rather than ISBN for easier interpretation

rating = rating.merge(book, on="ISBN")[['user_id','title','rating','ISBN']] # merging with the book dataframe
rating                                                                         # on 'ISBN' to get 'Book-Title'


Unnamed: 0,user_id,title,rating,ISBN
0,277427,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
1,3363,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
2,11676,Politically Correct Bedtime Stories: Modern Ta...,6,002542730X
3,12538,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
4,13552,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
...,...,...,...,...
79308,234828,Ringworld,8,0345333926
79309,236283,Ringworld,0,0345333926
79310,249628,Ringworld,0,0345333926
79311,261829,Ringworld,0,0345333926


In [10]:
# Check for duplicate values
print(f'Duplicate entries: {rating.duplicated().sum()}')

Duplicate entries: 0


In [11]:
rating.drop_duplicates(inplace=True)
rating

Unnamed: 0,user_id,title,rating,ISBN
0,277427,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
1,3363,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
2,11676,Politically Correct Bedtime Stories: Modern Ta...,6,002542730X
3,12538,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
4,13552,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
...,...,...,...,...
79308,234828,Ringworld,8,0345333926
79309,236283,Ringworld,0,0345333926
79310,249628,Ringworld,0,0345333926
79311,261829,Ringworld,0,0345333926


In [12]:
rating

Unnamed: 0,user_id,title,rating,ISBN
0,277427,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
1,3363,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
2,11676,Politically Correct Bedtime Stories: Modern Ta...,6,002542730X
3,12538,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
4,13552,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
...,...,...,...,...
79308,234828,Ringworld,8,0345333926
79309,236283,Ringworld,0,0345333926
79310,249628,Ringworld,0,0345333926
79311,261829,Ringworld,0,0345333926


In [13]:
book_pivot = rating.pivot_table(columns='user_id', index='title', values="rating")
#book_pivot.fillna(0, inplace=True)

In [14]:
book_pivot

user_id,254,2276,2766,3363,3757,4385,6251,6543,6575,7158,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16 Lighthouse Road,,,,,,,,,,,...,,,,,,,,,,
1984,9.0,,,,,,,,,,...,10.0,,,,,,0.0,,,
1st to Die: A Novel,,,,,,,,9.0,,0.0,...,,,,,,,,,,
2010: Odyssey Two,,0.0,,,,,,,,,...,,,,,,,,,,
204 Rosewood Lane,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
You Belong To Me,,,,,,,,,,0.0,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,0.0,,,0.0,,,,...,,,,,,,0.0,,,
Zoya,,,,,,,,,,,...,,0.0,,,,,,,,
"\O\"" Is for Outlaw""",,,,,,,,,,,...,,,,,8.0,,,,,


In [15]:
book_pivot1 = rating.pivot_table(columns='user_id', index='ISBN', values="rating")
#book_pivot1.fillna(0, inplace=True)

In [16]:
book_pivot1

user_id,254,2276,2766,3363,3757,4385,6251,6543,6575,7158,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
ISBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000649840X,,,,,,,,,,,...,,,0.0,,,,,,,
002026478X,,,,,,,,,,,...,,,,,,,,,,
0020442203,,,,,,,,,,,...,0.0,,,,,,,,,0.0
002542730X,,,,0.0,,,,,,,...,0.0,,,,,,,10.0,,
0028604199,,,,0.0,,,,,0.0,,...,,,,,,,,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3257229534,,0.0,,,,,,,,,...,,,,,,,,,,
3404148665,,,,,,,,,,,...,,,,,,,,,,
3423202327,,,,,,,,,,,...,,,,,,,,,,
3442541751,,,,,,,,,,,...,,,,,,,,,,


# Normalization utility matrix

In [17]:
rating = rating[['user_id','ISBN','rating']]

In [18]:
rating

Unnamed: 0,user_id,ISBN,rating
0,277427,002542730X,10
1,3363,002542730X,0
2,11676,002542730X,6
3,12538,002542730X,10
4,13552,002542730X,0
...,...,...,...
79308,234828,0345333926,8
79309,236283,0345333926,0
79310,249628,0345333926,0
79311,261829,0345333926,0


In [19]:
len(rating['user_id'].unique())

686

In [20]:
len(rating['ISBN'].unique())

2101

In [21]:
rating.values

array([[277427, '002542730X', 10],
       [3363, '002542730X', 0],
       [11676, '002542730X', 6],
       ...,
       [249628, '0345333926', 0],
       [261829, '0345333926', 0],
       [264321, '0345333926', 8]], dtype=object)

In [22]:
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix
book_sparse = csr_matrix(book_pivot1)

In [23]:
book_sparse

<2101x686 sparse matrix of type '<class 'numpy.float64'>'
	with 1380228 stored elements in Compressed Sparse Row format>

In [24]:
book_pivot1

user_id,254,2276,2766,3363,3757,4385,6251,6543,6575,7158,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
ISBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000649840X,,,,,,,,,,,...,,,0.0,,,,,,,
002026478X,,,,,,,,,,,...,,,,,,,,,,
0020442203,,,,,,,,,,,...,0.0,,,,,,,,,0.0
002542730X,,,,0.0,,,,,,,...,0.0,,,,,,,10.0,,
0028604199,,,,0.0,,,,,0.0,,...,,,,,,,,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3257229534,,0.0,,,,,,,,,...,,,,,,,,,,
3404148665,,,,,,,,,,,...,,,,,,,,,,
3423202327,,,,,,,,,,,...,,,,,,,,,,
3442541751,,,,,,,,,,,...,,,,,,,,,,


In [25]:
Y_data = rating.values
Y_data

array([[277427, '002542730X', 10],
       [3363, '002542730X', 0],
       [11676, '002542730X', 6],
       ...,
       [249628, '0345333926', 0],
       [261829, '0345333926', 0],
       [264321, '0345333926', 8]], dtype=object)

In [26]:
len(Y_data[: , 0])

79313

In [27]:
first = Y_data[:, 0] # first col of the Y_data
sec = Y_data[:, 1]
n_users = int(np.max(Y_data[:, 0])) + 1
n_items = int(np.max(Y_data[:, 1])) + 1
Y = np.copy(Y_data)
mu = np.zeros((n_users,))

In [28]:

        for n in range(n_users):
            # row indices of rating done by user n
            # since indices need to be integers, we need to convert
            # ids = np.where(first == n)[0].astype(np.int32)
            ids = np.where(first == n)[0].astype(np.int32)
            # indices of all ratings associated with user n
            item_ids = Y_data[ids, 1] 
            # and the corresponding ratings 
            ratingsbook = Y_data[ids, 2]
            # take mean
            m = np.mean(ratingsbook) 
            if np.isnan(m):
                m = 0 # to avoid empty array and nan value
            mu[n] = m
            # normalize
            Y[ids,2] = ratingsbook - mu[n]

In [29]:
Y

array([[277427, '002542730X', 8.054794520547945],
       [3363, '002542730X', -0.8904109589041096],
       [11676, '002542730X', 1.0417482061317678],
       ...,
       [249628, '0345333926', -2.659340659340659],
       [261829, '0345333926', -2.7162790697674417],
       [264321, '0345333926', 6.0625]], dtype=object)

In [30]:
book_pivot = pd.DataFrame(Y)
book_pivot

Unnamed: 0,0,1,2
0,277427,002542730X,8.054795
1,3363,002542730X,-0.890411
2,11676,002542730X,1.041748
3,12538,002542730X,8.869281
4,13552,002542730X,-2.731707
...,...,...,...
79308,234828,0345333926,4.961165
79309,236283,0345333926,-1.887097
79310,249628,0345333926,-2.659341
79311,261829,0345333926,-2.716279


In [31]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
dist_func = cosine_similarity

## user-based1

In [32]:
book_pivot2 = book_pivot1.T
book_pivot2

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,0.0,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,0.0,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
3363,,,,0.0,0.0,,,,,,...,,,,,,,,,,
3757,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,,,,,,,,,,,...,,,,,,,,,,
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,10.0,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [33]:
dist_func = cosine_similarity

In [34]:
def standardize(row):
    new_row = (row-row.mean())
    return new_row
ratingustd = book_pivot2.apply(standardize)
ratingustdnan = ratingustd.copy(deep=True)
ratingustdnan 

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,-3.0,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,-1.8,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
3363,,,,-2.0,-1.689655,,,,,,...,,,,,,,,,,
3757,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,,,,,,,,,,,...,,,,,,,,,,
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,8.0,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [35]:
ratingustd.fillna(0, inplace=True)
ratingustd

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,0.0,0.0,0.000000,0.0,0.000000,-3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2276,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-1.8,0.0,0.0,0.0,0.0
2766,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3363,0.0,0.0,0.000000,-2.0,-1.689655,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3757,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
275970,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
277427,0.0,0.0,0.000000,8.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
277639,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
ratingustdnan

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,-3.0,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,-1.8,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
3363,,,,-2.0,-1.689655,,,,,,...,,,,,,,,,,
3757,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,,,,,,,,,,,...,,,,,,,,,,
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,8.0,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [37]:
eps = 1e-6
user_sim = dist_func(ratingustd)
print(user_sim)

[[ 1.00000000e+00 -5.04834012e-03 -5.72855368e-04 ... -5.10156372e-02
   0.00000000e+00 -1.74338782e-02]
 [-5.04834012e-03  1.00000000e+00 -1.23035124e-02 ...  1.64104743e-03
  -1.76093270e-02 -1.04546358e-02]
 [-5.72855368e-04 -1.23035124e-02  1.00000000e+00 ... -3.52927959e-02
  -3.52069735e-02 -1.90530860e-02]
 ...
 [-5.10156372e-02  1.64104743e-03 -3.52927959e-02 ...  1.00000000e+00
   5.13980714e-03 -4.65325811e-03]
 [ 0.00000000e+00 -1.76093270e-02 -3.52069735e-02 ...  5.13980714e-03
   1.00000000e+00 -4.40596908e-02]
 [-1.74338782e-02 -1.04546358e-02 -1.90530860e-02 ... -4.65325811e-03
  -4.40596908e-02  1.00000000e+00]]


In [38]:
sparse_df = sparse.csr_matrix(ratingustd)
corrMatrix = pd.DataFrame(cosine_similarity(sparse_df),index=ratingustd.T.columns,columns=ratingustd.T.columns)
corrMatrix

user_id,254,2276,2766,3363,3757,4385,6251,6543,6575,7158,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,1.000000,-0.005048,-0.000573,0.010840,0.0,0.000000,-0.000166,0.058455,-0.030500,-0.008835,...,0.039023,-0.083098,0.010163,-0.054466,0.000352,0.000033,-0.048361,-0.051016,0.000000,-0.017434
2276,-0.005048,1.000000,-0.012304,-0.023621,0.0,0.142543,-0.010740,-0.018862,-0.020890,-0.003665,...,-0.009882,0.013861,0.034410,-0.021894,-0.014590,-0.035553,-0.002858,0.001641,-0.017609,-0.010455
2766,-0.000573,-0.012304,1.000000,-0.034920,0.0,0.000000,-0.040915,-0.032349,-0.036315,-0.021717,...,0.009708,-0.028059,-0.012777,-0.006027,0.041503,-0.023007,0.032139,-0.035293,-0.035207,-0.019053
3363,0.010840,-0.023621,-0.034920,1.000000,0.0,0.000000,-0.014689,0.002479,0.036123,0.021306,...,0.014366,-0.033885,0.002545,0.009482,-0.025848,-0.006923,0.007892,-0.012387,-0.005905,0.002013
3757,0.000000,0.000000,0.000000,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,0.000033,-0.035553,-0.023007,-0.006923,0.0,-0.028206,-0.042891,-0.002486,-0.015138,0.004960,...,-0.047844,-0.018734,0.004960,0.012604,-0.023172,1.000000,0.007014,0.039045,-0.057736,0.090424
275970,-0.048361,-0.002858,0.032139,0.007892,0.0,-0.028554,0.009785,-0.023164,0.019809,0.005775,...,-0.066288,-0.023174,-0.054456,-0.004785,-0.018055,0.007014,1.000000,-0.052235,-0.003620,-0.023719
277427,-0.051016,0.001641,-0.035293,-0.012387,0.0,0.000000,0.023610,-0.011525,-0.046949,0.009279,...,-0.047626,0.002942,-0.011321,-0.010862,-0.016333,0.039045,-0.052235,1.000000,0.005140,-0.004653
277639,0.000000,-0.017609,-0.035207,-0.005905,0.0,0.000000,-0.009826,0.007403,-0.005135,0.011213,...,0.000000,0.003676,0.002109,-0.003617,-0.018100,-0.057736,-0.003620,0.005140,1.000000,-0.044060


In [39]:
user = 2276
sim_score = corrMatrix[user]
sim_score.sort_values(ascending = False)[1:4]

user_id
4385      0.142543
247752    0.112904
217121    0.086589
Name: 2276, dtype: float64

In [40]:
corrMatrix.values

array([[ 1.00000000e+00, -5.04834012e-03, -5.72855368e-04, ...,
        -5.10156372e-02,  0.00000000e+00, -1.74338782e-02],
       [-5.04834012e-03,  1.00000000e+00, -1.23035124e-02, ...,
         1.64104743e-03, -1.76093270e-02, -1.04546358e-02],
       [-5.72855368e-04, -1.23035124e-02,  1.00000000e+00, ...,
        -3.52927959e-02, -3.52069735e-02, -1.90530860e-02],
       ...,
       [-5.10156372e-02,  1.64104743e-03, -3.52927959e-02, ...,
         1.00000000e+00,  5.13980714e-03, -4.65325811e-03],
       [ 0.00000000e+00, -1.76093270e-02, -3.52069735e-02, ...,
         5.13980714e-03,  1.00000000e+00, -4.40596908e-02],
       [-1.74338782e-02, -1.04546358e-02, -1.90530860e-02, ...,
        -4.65325811e-03, -4.40596908e-02,  1.00000000e+00]])

In [41]:
Y_data

array([[277427, '002542730X', 10],
       [3363, '002542730X', 0],
       [11676, '002542730X', 6],
       ...,
       [249628, '0345333926', 0],
       [261829, '0345333926', 0],
       [264321, '0345333926', 8]], dtype=object)

In [42]:
corrMatrix

user_id,254,2276,2766,3363,3757,4385,6251,6543,6575,7158,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,1.000000,-0.005048,-0.000573,0.010840,0.0,0.000000,-0.000166,0.058455,-0.030500,-0.008835,...,0.039023,-0.083098,0.010163,-0.054466,0.000352,0.000033,-0.048361,-0.051016,0.000000,-0.017434
2276,-0.005048,1.000000,-0.012304,-0.023621,0.0,0.142543,-0.010740,-0.018862,-0.020890,-0.003665,...,-0.009882,0.013861,0.034410,-0.021894,-0.014590,-0.035553,-0.002858,0.001641,-0.017609,-0.010455
2766,-0.000573,-0.012304,1.000000,-0.034920,0.0,0.000000,-0.040915,-0.032349,-0.036315,-0.021717,...,0.009708,-0.028059,-0.012777,-0.006027,0.041503,-0.023007,0.032139,-0.035293,-0.035207,-0.019053
3363,0.010840,-0.023621,-0.034920,1.000000,0.0,0.000000,-0.014689,0.002479,0.036123,0.021306,...,0.014366,-0.033885,0.002545,0.009482,-0.025848,-0.006923,0.007892,-0.012387,-0.005905,0.002013
3757,0.000000,0.000000,0.000000,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,0.000033,-0.035553,-0.023007,-0.006923,0.0,-0.028206,-0.042891,-0.002486,-0.015138,0.004960,...,-0.047844,-0.018734,0.004960,0.012604,-0.023172,1.000000,0.007014,0.039045,-0.057736,0.090424
275970,-0.048361,-0.002858,0.032139,0.007892,0.0,-0.028554,0.009785,-0.023164,0.019809,0.005775,...,-0.066288,-0.023174,-0.054456,-0.004785,-0.018055,0.007014,1.000000,-0.052235,-0.003620,-0.023719
277427,-0.051016,0.001641,-0.035293,-0.012387,0.0,0.000000,0.023610,-0.011525,-0.046949,0.009279,...,-0.047626,0.002942,-0.011321,-0.010862,-0.016333,0.039045,-0.052235,1.000000,0.005140,-0.004653
277639,0.000000,-0.017609,-0.035207,-0.005905,0.0,0.000000,-0.009826,0.007403,-0.005135,0.011213,...,0.000000,0.003676,0.002109,-0.003617,-0.018100,-0.057736,-0.003620,0.005140,1.000000,-0.044060


In [43]:
corrMatrix.values[2,3]

-0.03492001789302521

In [44]:
book_pivot2

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,0.0,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,0.0,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
3363,,,,0.0,0.0,,,,,,...,,,,,,,,,,
3757,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,,,,,,,,,,,...,,,,,,,,,,
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,10.0,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [45]:
picked_u = 3363
picked_b = ['000649840X']
# Tìm các user đã rate book
picked = pd.DataFrame(ratingustdnan[picked_b].dropna(axis=0))
picked

ISBN,000649840X
user_id,Unnamed: 1_level_1
11676,5.210526
32440,5.210526
69405,2.210526
70052,0.210526
86243,5.210526
100459,-2.789474
114868,-2.789474
131855,-2.789474
140000,3.210526
148258,4.210526


In [46]:
# pick user similarity high 
pickuser_sim = pd.DataFrame(corrMatrix[picked_u])
pickuser_sim

Unnamed: 0_level_0,3363
user_id,Unnamed: 1_level_1
254,0.010840
2276,-0.023621
2766,-0.034920
3363,1.000000
3757,0.000000
...,...
274308,-0.006923
275970,0.007892
277427,-0.012387
277639,-0.005905


In [47]:
k = 5
pickuser_id = pd.merge(left=picked,right=pickuser_sim, on='user_id', how='inner')

In [48]:
pickK_user = pickuser_id.sort_values(3363, ascending=False)[:k]
pickK_user

Unnamed: 0_level_0,000649840X,3363
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
140000,3.210526,0.041866
70052,0.210526,0.033045
206979,-2.789474,0.031257
148258,4.210526,0.020481
32440,5.210526,0.019986


In [49]:
#caluculate rating prediction 
print(pickK_user.values[: ,0]*pickK_user.values[:,1])

[ 0.13441345  0.00695694 -0.08718979  0.08623694  0.10413981]


In [50]:
np.sum(np.abs(pickK_user.values[: ,0]))

15.631578947368423

In [51]:
np.sum(pickK_user.values[: ,0]*pickK_user.values[:,1])

0.24455734421758574

## User-based

In [52]:
Y

array([[277427, '002542730X', 8.054794520547945],
       [3363, '002542730X', -0.8904109589041096],
       [11676, '002542730X', 1.0417482061317678],
       ...,
       [249628, '0345333926', -2.659340659340659],
       [261829, '0345333926', -2.7162790697674417],
       [264321, '0345333926', 6.0625]], dtype=object)

In [67]:
book_pivot2

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,0.0,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,0.0,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
3363,,,,0.0,0.0,,,,,,...,,,,,,,,,,
3757,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,,,,,,,,,,,...,,,,,,,,,,
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,10.0,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [53]:
#normalize user-item matrix
# axis = 1 cloumns
# axis = 0 index 
matrix_norm = book_pivot2.subtract(book_pivot2.mean(1), axis=0 )
matrix_norm

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,-1.909091,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,-4.035714,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
3363,,,,-0.890411,-0.890411,,,,,,...,,,,,,,,,,
3757,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,,,,,,,,,,,...,,,,,,,,,,
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,8.054795,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [54]:
matrix_norm0 = matrix_norm.copy(deep=True)
matrix_norm0

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,-1.909091,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,-4.035714,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
3363,,,,-0.890411,-0.890411,,,,,,...,,,,,,,,,,
3757,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,,,,,,,,,,,...,,,,,,,,,,
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,8.054795,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [55]:
matrix_norm.fillna(0, inplace=True)
matrix_norm.T

user_id,254,2276,2766,3363,3757,4385,6251,6543,6575,7158,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
ISBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000649840X,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.0,-2.890244,0.0,0.0,0.0,0.0,0.000000,0.0,0.00000
002026478X,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.00000
0020442203,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,...,-3.517647,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,-0.26383
002542730X,0.0,0.000000,0.0,-0.890411,0.0,0.0,0.0,0.0,0.000000,0.0,...,-3.517647,0.0,0.000000,0.0,0.0,0.0,0.0,8.054795,0.0,0.00000
0028604199,0.0,0.000000,0.0,-0.890411,0.0,0.0,0.0,0.0,-2.622642,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,-0.26383
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3257229534,0.0,-4.035714,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.00000
3404148665,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.00000
3423202327,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.00000
3442541751,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.00000


In [56]:
user_similarity = sparse.csr_matrix(matrix_norm)
user_similarity

<686x2101 sparse matrix of type '<class 'numpy.float64'>'
	with 77953 stored elements in Compressed Sparse Row format>

In [57]:
user_sim_cosine = cosine_similarity(matrix_norm)
user_sim_cosine

array([[ 1.00000000e+00, -1.60161857e-02, -2.79766170e-03, ...,
        -3.42560953e-02,  0.00000000e+00, -1.47720228e-03],
       [-1.60161857e-02,  1.00000000e+00, -1.85546104e-02, ...,
         1.06733289e-02, -6.30591992e-03,  3.56735087e-03],
       [-2.79766170e-03, -1.85546104e-02,  1.00000000e+00, ...,
        -4.78994827e-02, -5.28848049e-02, -9.24729514e-03],
       ...,
       [-3.42560953e-02,  1.06733289e-02, -4.78994827e-02, ...,
         1.00000000e+00,  1.43972150e-02,  6.17862678e-04],
       [ 0.00000000e+00, -6.30591992e-03, -5.28848049e-02, ...,
         1.43972150e-02,  1.00000000e+00, -4.94989508e-03],
       [-1.47720228e-03,  3.56735087e-03, -9.24729514e-03, ...,
         6.17862678e-04, -4.94989508e-03,  1.00000000e+00]])

In [58]:
u_sim = pd.DataFrame(cosine_similarity(user_similarity), index=matrix_norm.T.columns, columns=matrix_norm.T.columns)
u_sim

user_id,254,2276,2766,3363,3757,4385,6251,6543,6575,7158,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,1.000000,-0.016016,-0.002798,0.004848,0.0,0.000000,0.063623,0.044174,-0.023810,-0.010455,...,0.068968,-0.036984,0.002531,-0.045615,0.009170,0.032551,-0.027925,-0.034256,0.000000,-0.001477
2276,-0.016016,1.000000,-0.018555,-0.006342,0.0,0.089512,0.000203,-0.036163,-0.032431,-0.013835,...,-0.006226,0.015490,0.005960,-0.017207,-0.025529,-0.006704,0.001925,0.010673,-0.006306,0.003567
2766,-0.002798,-0.018555,1.000000,0.012346,0.0,0.000000,-0.041767,-0.058768,-0.032588,-0.022707,...,0.005403,-0.028797,-0.002803,0.004021,-0.029962,-0.021462,0.033103,-0.047899,-0.052885,-0.009247
3363,0.004848,-0.006342,0.012346,1.000000,0.0,0.000000,-0.019875,0.008419,0.055860,0.037666,...,0.037054,-0.023054,0.003188,0.008531,0.002046,-0.008537,-0.011058,-0.011186,-0.007326,-0.011965
3757,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274308,0.032551,-0.006704,-0.021462,-0.008537,0.0,-0.011667,0.009340,-0.019369,0.012608,-0.009392,...,-0.025144,-0.022424,0.015741,-0.004875,0.004521,1.000000,0.001048,0.069339,-0.041328,0.009594
275970,-0.027925,0.001925,0.033103,-0.011058,0.0,-0.015220,0.009892,0.002496,0.070008,0.002141,...,-0.017114,-0.021054,-0.045960,-0.008647,-0.027877,0.001048,1.000000,-0.050134,-0.005014,-0.006475
277427,-0.034256,0.010673,-0.047899,-0.011186,0.0,0.000000,0.011634,0.002259,-0.041205,-0.001611,...,-0.046252,-0.003123,-0.006760,0.003114,0.019559,0.069339,-0.050134,1.000000,0.014397,0.000618
277639,0.000000,-0.006306,-0.052885,-0.007326,0.0,0.000000,-0.010763,0.018491,-0.004601,0.013264,...,0.000000,0.006965,0.009096,-0.003556,-0.038485,-0.041328,-0.005014,0.014397,1.000000,-0.004950


In [59]:
# number of similar users
k = 5

In [60]:
picked_u

3363

In [61]:
# picked = pd.DataFrame(ratingustdnan[picked_b].dropna(axis=0))
# picked
similar_user = u_sim[picked_u].sort_values(ascending=False)[1:k+1]
similar_user

user_id
179733    0.156396
138441    0.144001
20115     0.138512
190708    0.119704
227447    0.112969
Name: 3363, dtype: float64

In [62]:
#narrow down item
similar_user_book = matrix_norm0[matrix_norm0.index.isin(similar_user.index)].dropna(axis=1, how='all')
similar_user_book

ISBN,002542730X,0060008032,0060085444,006016848X,0060175400,0060502258,0060505885,0060557257,0060928336,0060929871,...,155166884X,1551668998,1551669374,155874262X,1558744150,1558744606,1559029838,1573225517,157322930X,1885171080
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20115,,,,,-0.57971,,,,,,...,,,,,-0.57971,,,,,
138441,,,-0.945946,,,,,,,,...,,,,,,,,,,
179733,-0.626984,,,,,-0.626984,-0.626984,-0.626984,-0.626984,,...,-0.626984,,,,,,,,-0.626984,-0.626984
190708,,,,-1.581197,,,,,,,...,,,,,,,,,,
227447,,-0.621891,,,,,,,-0.621891,-0.621891,...,,-0.621891,-0.621891,-0.621891,-0.621891,-0.621891,-0.621891,-0.621891,,


In [63]:
picked_userid_read = matrix_norm0[matrix_norm0.index == picked_u].dropna(axis=1, how='all')
picked_userid_read

ISBN,002542730X,0028604199,0060090367,0060096195,0060175400,0060502258,0060928336,0060958022,0060976497,0060976845,...,0971880107,1551668998,155874262X,1558743316,1558744150,1558744606,1558744630,1558747028,1565122968,1573229385
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3363,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,...,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411,-0.890411


Next we will drop the movies that user id have watch 

In [64]:
similar_user_book.drop(picked_userid_read.columns,axis=1, inplace = True,errors = 'ignore')
similar_user_book

ISBN,0060008032,0060085444,006016848X,0060505885,0060557257,0060929871,0060934417,0060936363,0060938455,0060987561,...,1400031362,155166674X,1551668246,1551668300,155166884X,1551669374,1559029838,1573225517,157322930X,1885171080
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20115,,,,,,,,,,-0.57971,...,,,,,,,,,,
138441,,-0.945946,,,,,,,,,...,,,-0.945946,,,,,,,
179733,,,,-0.626984,-0.626984,,,,,,...,,,,,-0.626984,,,,-0.626984,-0.626984
190708,,,-1.581197,,,,,,,,...,,,,,,,,,,
227447,-0.621891,,,,,-0.621891,-0.621891,-0.621891,9.378109,,...,-0.621891,-0.621891,,-0.621891,,-0.621891,-0.621891,-0.621891,,


In [65]:
similar_user_book.columns

Index(['0060008032', '0060085444', '006016848X', '0060505885', '0060557257',
       '0060929871', '0060934417', '0060936363', '0060938455', '0060987561',
       ...
       '1400031362', '155166674X', '1551668246', '1551668300', '155166884X',
       '1551669374', '1559029838', '1573225517', '157322930X', '1885171080'],
      dtype='object', name='ISBN', length=562)

Recommend book

In [66]:
item_score = {}

for i in similar_user_book.columns:
    book_rating = similar_user_book[i]
    # create a variable to score the score
    total = 0
    # variable to store the number of score
    count = 0
    for u in similar_user.index:
        if pd.isna(book_rating[u]) == False:
            score = similar_user[u] * book_rating[u]
            total +=score
            count +=1
    # get average score for item 
    item_score[i] = total/count
    
item_score = pd.DataFrame(item_score.items(), columns=['book', 'book_score'])
ranked_item_score = item_score.sort_values(by= 'book_score', ascending=False)
    
m=10 
ranked_item_score.head(m)


Unnamed: 0,book,book_score
262,0440241413,1.465901
515,0740704818,1.465901
33,014028009X,1.465901
229,0440221315,1.465901
28,014025448X,1.304821
29,0140254544,1.304821
74,0330375253,1.159794
216,0440212812,1.159794
8,0060938455,1.059432
356,051511779X,1.059432
