In [268]:
# Libraries for data preparation & visualization
import numpy as np
import seaborn as sns
import pandas as pd
import plotly.io as pio
import matplotlib.pyplot as plt
pio.renderers.default = "png"

# Ignore printing warnings for general readability
import warnings 
warnings.filterwarnings("ignore")


In [269]:
# Loading the dataset 
def loaddata(filename):
    df = pd.read_csv(f'{filename}.csv',sep=';',error_bad_lines=False,warn_bad_lines=False,encoding='latin-1')
    return df

book   = loaddata("../../BX-Books")
user   = loaddata("../../BX-Users")
rating = loaddata("../../BX-Book-Ratings")

In [270]:
#Preprocessing Data
book = book[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']]
book.rename(columns = {'Book-Title':'title', 'Book-Author':'author', 'Year-Of-Publication':'year', 'Publisher':'publisher'}, inplace=True)
user.rename(columns = {'User-ID':'user_id', 'Location':'location', 'Age':'age'}, inplace=True)
rating.rename(columns = {'User-ID':'user_id', 'Book-Rating':'rating'}, inplace=True)

In [271]:
rating

Unnamed: 0,user_id,ISBN,rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [272]:
# Let's visualize ratings given by users

rating_users = rating['user_id'].value_counts().reset_index().\
               rename({'index':'user_id','user_id':'rating'}, axis=1)
rating_users

Unnamed: 0,user_id,rating
0,11676,13602
1,198711,7550
2,153662,6109
3,98391,5891
4,35859,5850
...,...,...
105278,116180,1
105279,116166,1
105280,116154,1
105281,116137,1


In [273]:
# Let's visualize ratings received by books

rating_books = rating['ISBN'].value_counts().reset_index().\
               rename({'index':'ISBN','ISBN':'rating'}, axis=1)
rating_books

Unnamed: 0,ISBN,rating
0,0971880107,2502
1,0316666343,1295
2,0385504209,883
3,0060928336,732
4,0312195516,723
...,...,...
340551,1568656386,1
340552,1568656408,1
340553,1569551553,1
340554,1570081808,1


In [201]:
# In order to avoid rating bias & for making good recommendations, limit the dataset to only those
# users that have made at least 250 ratings & books that have received at least 50 ratings

rating = rating[rating['user_id'].isin(rating_users[rating_users['rating']>=100]['user_id'])]
rating = rating[rating['ISBN'].isin(rating_books[rating_books['rating']>= 100]['ISBN'])]

rating


Unnamed: 0,user_id,ISBN,rating
413,276925,002542730X,10
426,276925,0316666343,0
429,276925,0385504209,8
453,276925,0804106304,0
457,276925,0971880107,0
...,...,...,...
1149553,276680,0446670251,0
1149564,276680,0452283205,7
1149577,276680,0679731725,0
1149581,276680,0679781587,9


In [274]:
len(rating['user_id'].unique())

105283

In [275]:
# For the recommendation system, it is prefered to have the book titles rather than ISBN for easier interpretation

rating = rating.merge(book, on="ISBN")[['user_id','title','rating','ISBN']] # merging with the book dataframe
rating                                                                         # on 'ISBN' to get 'Book-Title'


Unnamed: 0,user_id,title,rating,ISBN
0,276725,Flesh Tones: A Novel,0,034545104X
1,2313,Flesh Tones: A Novel,5,034545104X
2,6543,Flesh Tones: A Novel,0,034545104X
3,8680,Flesh Tones: A Novel,5,034545104X
4,10314,Flesh Tones: A Novel,9,034545104X
...,...,...,...,...
1031131,276688,Mostly Harmless,0,0517145553
1031132,276688,Gray Matter,7,1575660792
1031133,276690,Triplet Trouble and the Class Trip (Triplet Tr...,0,0590907301
1031134,276704,A Desert of Pure Feeling (Vintage Contemporaries),0,0679752714


In [204]:
# Check for duplicate values
print(f'Duplicate entries: {rating.duplicated().sum()}')

Duplicate entries: 0


In [205]:
rating.drop_duplicates(inplace=True)
rating

Unnamed: 0,user_id,title,rating,ISBN
0,276925,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
1,277427,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
2,3363,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
3,10030,Politically Correct Bedtime Stories: Modern Ta...,7,002542730X
4,11676,Politically Correct Bedtime Stories: Modern Ta...,6,002542730X
...,...,...,...,...
65518,263460,Pretend You Don't See Her,0,0671867156
65519,269566,Pretend You Don't See Her,0,0671867156
65520,271284,Pretend You Don't See Her,0,0671867156
65521,273979,Pretend You Don't See Her,0,0671867156


In [206]:
rating

Unnamed: 0,user_id,title,rating,ISBN
0,276925,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
1,277427,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
2,3363,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
3,10030,Politically Correct Bedtime Stories: Modern Ta...,7,002542730X
4,11676,Politically Correct Bedtime Stories: Modern Ta...,6,002542730X
...,...,...,...,...
65518,263460,Pretend You Don't See Her,0,0671867156
65519,269566,Pretend You Don't See Her,0,0671867156
65520,271284,Pretend You Don't See Her,0,0671867156
65521,273979,Pretend You Don't See Her,0,0671867156


In [207]:
book_pivot = rating.pivot_table(columns='user_id', index='title', values="rating")
#book_pivot.fillna(0, inplace=True)

In [208]:
book_pivot

user_id,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,276018,276463,276680,276925,277427,277478,277639,278137,278188,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,,,,,,,,,,
1st to Die: A Novel,,0.0,,,,,,,,,...,,,,,,,,,,
2nd Chance,,,,,,,,,,10.0,...,,,,,,,0.0,,,
4 Blondes,,,,,,,,,,,...,,,,,,,,,,
A Beautiful Mind: The Life of Mathematical Genius and Nobel Laureate John Nash,,,,0.0,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Without Remorse,,,,,,,,,,,...,,,,,,,,,,
Year of Wonders,,,,7.0,,,,,,,...,,,,,,,,,,
You Belong To Me,,,,,,,,,,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,,,,,,,...,,,,,,,,,,


In [209]:
book_pivot1 = rating.pivot_table(columns='user_id', index='ISBN', values="rating")
#book_pivot1.fillna(0, inplace=True)

In [210]:
book_pivot1

user_id,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,276018,276463,276680,276925,277427,277478,277639,278137,278188,278418
ISBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
002542730X,,,,,,,,,,,...,,,,10.0,10.0,,,,,
0060008032,,,,,,,,,,,...,,,,,,,,,,
0060096195,,,,,,,,,,,...,,,,,,,,,,
006016848X,,,,,,,,,,,...,,,,,,,,,,0.0
0060173289,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1573227331,,,,,,,,,,,...,,,,,,,,,,
1573229326,,,,,,,,,,,...,,,,,,,,,,
1573229571,,,,,,,,,,,...,,,,,,,,,,
1592400876,,,,,,,0.0,,,,...,,,,,,,,,,


# Normalization utility matrix

In [211]:
rating = rating[['user_id','ISBN','rating']]

In [212]:
rating

Unnamed: 0,user_id,ISBN,rating
0,276925,002542730X,10
1,277427,002542730X,10
2,3363,002542730X,0
3,10030,002542730X,7
4,11676,002542730X,6
...,...,...,...
65518,263460,0671867156,0
65519,269566,0671867156,0
65520,271284,0671867156,0
65521,273979,0671867156,0


In [213]:
len(rating['user_id'].unique())

1776

In [214]:
len(rating['ISBN'].unique())

727

In [215]:
rating.values

array([[276925, '002542730X', 10],
       [277427, '002542730X', 10],
       [3363, '002542730X', 0],
       ...,
       [271284, '0671867156', 0],
       [273979, '0671867156', 0],
       [274308, '0671867156', 0]], dtype=object)

In [216]:
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix
book_sparse = csr_matrix(book_pivot1)

In [217]:
book_sparse

<727x1776 sparse matrix of type '<class 'numpy.float64'>'
	with 1244067 stored elements in Compressed Sparse Row format>

In [218]:
book_pivot1

user_id,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,276018,276463,276680,276925,277427,277478,277639,278137,278188,278418
ISBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
002542730X,,,,,,,,,,,...,,,,10.0,10.0,,,,,
0060008032,,,,,,,,,,,...,,,,,,,,,,
0060096195,,,,,,,,,,,...,,,,,,,,,,
006016848X,,,,,,,,,,,...,,,,,,,,,,0.0
0060173289,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1573227331,,,,,,,,,,,...,,,,,,,,,,
1573229326,,,,,,,,,,,...,,,,,,,,,,
1573229571,,,,,,,,,,,...,,,,,,,,,,
1592400876,,,,,,,0.0,,,,...,,,,,,,,,,


In [219]:
Y_data = rating.values
Y_data

array([[276925, '002542730X', 10],
       [277427, '002542730X', 10],
       [3363, '002542730X', 0],
       ...,
       [271284, '0671867156', 0],
       [273979, '0671867156', 0],
       [274308, '0671867156', 0]], dtype=object)

In [220]:
len(Y_data[: , 0])

65523

In [221]:
first = Y_data[:, 0] # first col of the Y_data
sec = Y_data[:, 1]
n_users = int(np.max(Y_data[:, 0])) + 1
n_items = int(np.max(Y_data[:, 1])) + 1
Y = np.copy(Y_data)
mu = np.zeros((n_users,))

In [222]:

        for n in range(n_users):
            # row indices of rating done by user n
            # since indices need to be integers, we need to convert
            # ids = np.where(first == n)[0].astype(np.int32)
            ids = np.where(first == n)[0].astype(np.int32)
            # indices of all ratings associated with user n
            item_ids = Y_data[ids, 1] 
            # and the corresponding ratings 
            ratingsbook = Y_data[ids, 2]
            # take mean
            m = np.mean(ratingsbook) 
            if np.isnan(m):
                m = 0 # to avoid empty array and nan value
            mu[n] = m
            # normalize
            Y[ids,2] = ratingsbook - mu[n]

In [223]:
Y

array([[276925, '002542730X', 6.4],
       [277427, '002542730X', 8.354430379746836],
       [3363, '002542730X', -0.5],
       ...,
       [271284, '0671867156', -0.18181818181818182],
       [273979, '0671867156', -1.3673469387755102],
       [274308, '0671867156', -1.1612903225806452]], dtype=object)

In [224]:
book_pivot = pd.DataFrame(Y)
book_pivot

Unnamed: 0,0,1,2
0,276925,002542730X,6.4
1,277427,002542730X,8.35443
2,3363,002542730X,-0.5
3,10030,002542730X,4.526316
4,11676,002542730X,0.731707
...,...,...,...
65518,263460,0671867156,-1.0625
65519,269566,0671867156,-0.659218
65520,271284,0671867156,-0.181818
65521,273979,0671867156,-1.367347


In [225]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
dist_func = cosine_similarity

## user-based1

In [226]:
book_pivot2 = book_pivot1.T
book_pivot2

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,,,,,,...,,,,,,,,,,
507,,,,,,,,,,0.0,...,,,,,,,,,,
882,,,,,,,,,,,...,,,,,,,,,,
1424,,,,,,,,,,,...,,,,,,,,,,
1435,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,
278137,,,,,,,,,,,...,,,,,,,,,,
278188,,,,,,,,,,,...,,,,,,,,,,


In [227]:
dist_func = cosine_similarity

In [228]:
def standardize(row):
    new_row = (row-row.mean())
    return new_row
ratingustd = book_pivot2.apply(standardize)
ratingustdnan = ratingustd.copy(deep=True)
ratingustdnan 

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,,,,,,...,,,,,,,,,,
507,,,,,,,,,,-3.59322,...,,,,,,,,,,
882,,,,,,,,,,,...,,,,,,,,,,
1424,,,,,,,,,,,...,,,,,,,,,,
1435,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,
278137,,,,,,,,,,,...,,,,,,,,,,
278188,,,,,,,,,,,...,,,,,,,,,,


In [229]:
ratingustd.fillna(0, inplace=True)
ratingustd

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
507,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,-3.59322,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
882,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1424,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1435,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
277639,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278137,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278188,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.00000,...,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [230]:
ratingustdnan

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,,,,,,...,,,,,,,,,,
507,,,,,,,,,,-3.59322,...,,,,,,,,,,
882,,,,,,,,,,,...,,,,,,,,,,
1424,,,,,,,,,,,...,,,,,,,,,,
1435,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,
278137,,,,,,,,,,,...,,,,,,,,,,
278188,,,,,,,,,,,...,,,,,,,,,,


In [231]:
eps = 1e-6
user_sim = dist_func(ratingustd)
print(user_sim)

[[ 1.          0.12261    -0.01058163 ... -0.03926475  0.
  -0.01381008]
 [ 0.12261     1.          0.         ...  0.          0.
   0.05339538]
 [-0.01058163  0.          1.         ...  0.00188462  0.00800281
  -0.02212689]
 ...
 [-0.03926475  0.          0.00188462 ...  1.          0.
   0.0067356 ]
 [ 0.          0.          0.00800281 ...  0.          1.
  -0.04993539]
 [-0.01381008  0.05339538 -0.02212689 ...  0.0067356  -0.04993539
   1.        ]]


In [232]:
sparse_df = sparse.csr_matrix(ratingustd)
corrMatrix = pd.DataFrame(cosine_similarity(sparse_df),index=ratingustd.T.columns,columns=ratingustd.T.columns)
corrMatrix

user_id,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,276018,276463,276680,276925,277427,277478,277639,278137,278188,278418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,1.000000,0.122610,-0.010582,-0.026764,-0.045262,-0.057710,-0.132773,0.048458,0.028095,-0.005910,...,0.022858,0.168293,-0.039357,0.015547,-0.084897,0.000000,0.000000,-0.039265,0.000000,-0.013810
507,0.122610,1.000000,0.000000,0.000000,-0.054087,0.005194,0.052726,0.000000,0.015443,0.050199,...,0.000000,0.027059,0.000000,0.000000,-0.007496,0.000000,0.000000,0.000000,0.000000,0.053395
882,-0.010582,0.000000,1.000000,-0.035320,-0.047665,0.037115,0.010971,0.000000,0.000000,0.006760,...,0.000000,0.019855,0.039365,0.136703,0.019620,0.000000,0.000000,0.001885,0.008003,-0.022127
1424,-0.026764,0.000000,-0.035320,1.000000,0.000000,0.000000,0.000000,0.000000,0.082486,-0.034041,...,0.000000,0.000000,0.000000,-0.055973,-0.014798,0.104935,0.007718,0.141364,0.000000,0.013557
1435,-0.045262,-0.054087,-0.047665,0.000000,1.000000,0.006695,-0.007435,0.000000,0.000000,-0.007733,...,-0.363242,0.000000,0.000000,-0.013011,-0.008263,0.010531,-0.041546,-0.010179,0.000000,-0.034719
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,0.000000,0.000000,0.000000,0.104935,0.010531,-0.001961,0.002178,0.037439,0.151678,0.027796,...,0.000000,0.000000,0.000000,0.003811,0.017116,1.000000,-0.085604,0.090705,-0.118811,0.071865
277639,0.000000,0.000000,0.000000,0.007718,-0.041546,0.000000,0.000000,0.033631,0.000000,-0.036510,...,0.000000,0.000000,0.000000,0.000000,0.011281,-0.085604,1.000000,0.000000,0.000000,-0.076416
278137,-0.039265,0.000000,0.001885,0.141364,-0.010179,-0.001238,0.001375,0.080999,0.000000,0.000000,...,0.000000,0.000000,0.000000,-0.094228,0.000733,0.090705,0.000000,1.000000,0.000000,0.006736
278188,0.000000,0.000000,0.008003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,-0.029219,-0.118811,0.000000,0.000000,1.000000,-0.049935


In [233]:
user = 2276
sim_score = corrMatrix[user]
sim_score.sort_values(ascending = False)[1:4]

user_id
227250    0.320018
4385      0.305380
111847    0.172770
Name: 2276, dtype: float64

In [234]:
corrMatrix.values

array([[ 1.        ,  0.12261   , -0.01058163, ..., -0.03926475,
         0.        , -0.01381008],
       [ 0.12261   ,  1.        ,  0.        , ...,  0.        ,
         0.        ,  0.05339538],
       [-0.01058163,  0.        ,  1.        , ...,  0.00188462,
         0.00800281, -0.02212689],
       ...,
       [-0.03926475,  0.        ,  0.00188462, ...,  1.        ,
         0.        ,  0.0067356 ],
       [ 0.        ,  0.        ,  0.00800281, ...,  0.        ,
         1.        , -0.04993539],
       [-0.01381008,  0.05339538, -0.02212689, ...,  0.0067356 ,
        -0.04993539,  1.        ]])

In [235]:
Y_data

array([[276925, '002542730X', 10],
       [277427, '002542730X', 10],
       [3363, '002542730X', 0],
       ...,
       [271284, '0671867156', 0],
       [273979, '0671867156', 0],
       [274308, '0671867156', 0]], dtype=object)

In [236]:
corrMatrix

user_id,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,276018,276463,276680,276925,277427,277478,277639,278137,278188,278418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,1.000000,0.122610,-0.010582,-0.026764,-0.045262,-0.057710,-0.132773,0.048458,0.028095,-0.005910,...,0.022858,0.168293,-0.039357,0.015547,-0.084897,0.000000,0.000000,-0.039265,0.000000,-0.013810
507,0.122610,1.000000,0.000000,0.000000,-0.054087,0.005194,0.052726,0.000000,0.015443,0.050199,...,0.000000,0.027059,0.000000,0.000000,-0.007496,0.000000,0.000000,0.000000,0.000000,0.053395
882,-0.010582,0.000000,1.000000,-0.035320,-0.047665,0.037115,0.010971,0.000000,0.000000,0.006760,...,0.000000,0.019855,0.039365,0.136703,0.019620,0.000000,0.000000,0.001885,0.008003,-0.022127
1424,-0.026764,0.000000,-0.035320,1.000000,0.000000,0.000000,0.000000,0.000000,0.082486,-0.034041,...,0.000000,0.000000,0.000000,-0.055973,-0.014798,0.104935,0.007718,0.141364,0.000000,0.013557
1435,-0.045262,-0.054087,-0.047665,0.000000,1.000000,0.006695,-0.007435,0.000000,0.000000,-0.007733,...,-0.363242,0.000000,0.000000,-0.013011,-0.008263,0.010531,-0.041546,-0.010179,0.000000,-0.034719
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,0.000000,0.000000,0.000000,0.104935,0.010531,-0.001961,0.002178,0.037439,0.151678,0.027796,...,0.000000,0.000000,0.000000,0.003811,0.017116,1.000000,-0.085604,0.090705,-0.118811,0.071865
277639,0.000000,0.000000,0.000000,0.007718,-0.041546,0.000000,0.000000,0.033631,0.000000,-0.036510,...,0.000000,0.000000,0.000000,0.000000,0.011281,-0.085604,1.000000,0.000000,0.000000,-0.076416
278137,-0.039265,0.000000,0.001885,0.141364,-0.010179,-0.001238,0.001375,0.080999,0.000000,0.000000,...,0.000000,0.000000,0.000000,-0.094228,0.000733,0.090705,0.000000,1.000000,0.000000,0.006736
278188,0.000000,0.000000,0.008003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,-0.029219,-0.118811,0.000000,0.000000,1.000000,-0.049935


In [237]:
corrMatrix.values[2,3]

-0.03531961336547846

In [238]:
book_pivot2

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,,,,,,...,,,,,,,,,,
507,,,,,,,,,,0.0,...,,,,,,,,,,
882,,,,,,,,,,,...,,,,,,,,,,
1424,,,,,,,,,,,...,,,,,,,,,,
1435,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,
278137,,,,,,,,,,,...,,,,,,,,,,
278188,,,,,,,,,,,...,,,,,,,,,,


In [239]:
# picked_u = 3363
# picked_b = ['000649840X']
# # Tìm các user đã rate book
# picked = pd.DataFrame(ratingustdnan[picked_b].dropna(axis=0))
# picked

In [240]:
# pick user similarity high 
pickuser_sim = pd.DataFrame(corrMatrix[picked_u])
pickuser_sim

Unnamed: 0_level_0,4017
user_id,Unnamed: 1_level_1
254,-0.069903
507,-0.071401
882,0.034796
1424,0.006881
1435,0.079574
...,...
277478,-0.004511
277639,0.000000
278137,-0.023147
278188,0.000000


In [241]:
k = 5
pickuser_id = pd.merge(left=picked,right=pickuser_sim, on='user_id', how='inner')

In [242]:
pickK_user = pickuser_id.sort_values(3363, ascending=False)[:k]
pickK_user

KeyError: 3363

In [None]:
#caluculate rating prediction 
print(pickK_user.values[: ,0]*pickK_user.values[:,1])

In [None]:
np.sum(np.abs(pickK_user.values[: ,0]))

In [None]:
np.sum(pickK_user.values[: ,0]*pickK_user.values[:,1])

## User-based

In [243]:
book_pivot2

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,,,,,,...,,,,,,,,,,
507,,,,,,,,,,0.0,...,,,,,,,,,,
882,,,,,,,,,,,...,,,,,,,,,,
1424,,,,,,,,,,,...,,,,,,,,,,
1435,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,
278137,,,,,,,,,,,...,,,,,,,,,,
278188,,,,,,,,,,,...,,,,,,,,,,


In [244]:
#normalize user-item matrix
# axis = 1 cloumns
# axis = 0 index 
matrix_norm = book_pivot2.subtract(book_pivot2.mean(1), axis=0 )
matrix_norm

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,,,,,,...,,,,,,,,,,
507,,,,,,,,,,-1.142857,...,,,,,,,,,,
882,,,,,,,,,,,...,,,,,,,,,,
1424,,,,,,,,,,,...,,,,,,,,,,
1435,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,
278137,,,,,,,,,,,...,,,,,,,,,,
278188,,,,,,,,,,,...,,,,,,,,,,


In [245]:
matrix_norm0 = matrix_norm.copy(deep=True)
matrix_norm0

ISBN,002542730X,0060008032,0060096195,006016848X,0060173289,0060175400,006019491X,0060199652,0060391626,0060392452,...,1558744630,1558745157,1559029838,1573225517,1573225789,1573227331,1573229326,1573229571,1592400876,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,,,,,,...,,,,,,,,,,
507,,,,,,,,,,-1.142857,...,,,,,,,,,,
882,,,,,,,,,,,...,,,,,,,,,,
1424,,,,,,,,,,,...,,,,,,,,,,
1435,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,
278137,,,,,,,,,,,...,,,,,,,,,,
278188,,,,,,,,,,,...,,,,,,,,,,


In [246]:
matrix_norm.fillna(0, inplace=True)
matrix_norm.T

user_id,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,276018,276463,276680,276925,277427,277478,277639,278137,278188,278418
ISBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
002542730X,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,6.4,8.35443,0.0,0.0,0.0,0.0,0.000000
0060008032,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.000000
0060096195,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.000000
006016848X,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,-0.409524
0060173289,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1573227331,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.000000
1573229326,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.000000
1573229571,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.000000
1592400876,0.0,0.0,0.0,0.0,0.0,0.0,-1.052632,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.000000


In [247]:
user_similarity = sparse.csr_matrix(matrix_norm)
user_similarity

<1776x727 sparse matrix of type '<class 'numpy.float64'>'
	with 63497 stored elements in Compressed Sparse Row format>

In [248]:
user_sim_cosine = cosine_similarity(matrix_norm)
user_sim_cosine

array([[ 1.        ,  0.16684649, -0.01738169, ..., -0.03657457,
         0.        , -0.00274732],
       [ 0.16684649,  1.        ,  0.        , ...,  0.        ,
         0.        ,  0.00623413],
       [-0.01738169,  0.        ,  1.        , ..., -0.02486355,
         0.03126179, -0.05453676],
       ...,
       [-0.03657457,  0.        , -0.02486355, ...,  1.        ,
         0.        ,  0.00398226],
       [ 0.        ,  0.        ,  0.03126179, ...,  0.        ,
         1.        , -0.01004342],
       [-0.00274732,  0.00623413, -0.05453676, ...,  0.00398226,
        -0.01004342,  1.        ]])

In [249]:
u_sim = pd.DataFrame(cosine_similarity(user_similarity), index=matrix_norm.T.columns, columns=matrix_norm.T.columns)
u_sim

user_id,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,276018,276463,276680,276925,277427,277478,277639,278137,278188,278418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,1.000000,0.166846,-0.017382,-0.017726,-0.038520,-0.054380,-0.058078,0.281951,0.018609,-0.018342,...,0.0,0.113941,-0.022637,0.030171,-0.054827,0.0,0.000000,-0.036575,0.000000,-0.002747
507,0.166846,1.000000,0.000000,0.000000,-0.022543,0.027934,0.006886,0.000000,0.019509,0.070214,...,0.0,0.011997,0.000000,0.000000,-0.027150,0.0,0.000000,0.000000,0.000000,0.006234
882,-0.017382,0.000000,1.000000,-0.070256,-0.046620,-0.069750,0.010047,0.000000,0.000000,-0.001992,...,0.0,0.017504,0.025144,0.138323,0.048345,0.0,0.000000,-0.024864,0.031262,-0.054537
1424,-0.017726,0.000000,-0.070256,1.000000,0.000000,0.000000,0.000000,0.000000,0.050023,-0.033030,...,0.0,0.000000,0.000000,-0.066620,-0.001513,0.0,0.038392,0.080759,0.000000,0.010657
1435,-0.038520,-0.022543,-0.046620,0.000000,1.000000,-0.023846,-0.007580,0.000000,0.000000,-0.005132,...,0.0,0.000000,0.000000,-0.034817,-0.015593,0.0,-0.058449,-0.016153,0.000000,0.000319
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
277639,0.000000,0.000000,0.000000,0.038392,-0.058449,0.000000,0.000000,0.034982,0.000000,-0.025187,...,0.0,0.000000,0.000000,0.000000,0.018465,0.0,1.000000,0.000000,0.000000,-0.011951
278137,-0.036575,0.000000,-0.024864,0.080759,-0.016153,0.041514,0.013196,0.064387,0.000000,0.000000,...,0.0,0.000000,0.000000,-0.097762,0.009048,0.0,0.000000,1.000000,0.000000,0.003982
278188,0.000000,0.000000,0.031262,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,-0.021920,0.0,0.000000,0.000000,1.000000,-0.010043


In [259]:
# number of similar users
k = 10

In [260]:
picked_u = 4017

In [261]:
# picked = pd.DataFrame(ratingustdnan[picked_b].dropna(axis=0))
# picked
similar_user = u_sim[picked_u].sort_values(ascending=False)[1:k+1]
similar_user

user_id
89192     0.223488
187410    0.180796
115120    0.164933
113519    0.159355
253821    0.152998
69389     0.150875
13093     0.149806
16161     0.148517
272715    0.147928
81207     0.147813
Name: 4017, dtype: float64

In [262]:
#narrow down item
similar_user_book = matrix_norm0[matrix_norm0.index.isin(similar_user.index)].dropna(axis=1, how='all')
similar_user_book

ISBN,006016848X,0060173289,0060391626,0060392452,0060502258,0060915544,0060916508,0060921145,0060922532,0060928336,...,080411918X,0805063897,0842329242,0971880107,1400034779,1551668998,155874262X,1558743669,1558744630,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13093,,,,,,,,,,,...,,,,-1.125,,,,,,
16161,,,,,,,,,,,...,,,,,,,,,,
69389,,,-0.064516,,,,,,,,...,,,-0.064516,,,,,,,
81207,,,,,,,,,,-2.461538,...,,,,,,,,,,
89192,,,,,,,,,,,...,,,,,,,,,,
113519,-1.339506,-1.339506,-1.339506,,-1.339506,,-1.339506,-1.339506,-1.339506,-1.339506,...,-1.339506,-1.339506,,-1.339506,,,7.660494,-1.339506,-1.339506,
115120,,,-0.106509,,,,,,,-0.106509,...,,,,-0.106509,-0.106509,-0.106509,,,,
187410,,,,,1.769231,-2.230769,,,,-2.230769,...,,,,,,,,,,-2.230769
253821,,,,,,,,,,,...,,,,-1.4,,,,,,
272715,,,,-0.888889,,,,,,,...,,,,-0.888889,,,,,,


In [263]:
picked_userid_read = matrix_norm0[matrix_norm0.index == picked_u].dropna(axis=1, how='all')
picked_userid_read

ISBN,0060915544,0060928336,0060929871,0140042598,0140244824,014028009X,0142001740,0156027321,0312291639,0316096199,...,067976402X,068484267X,068484477X,0688177859,0786866586,0786868716,0786881852,0802139256,0804106304,1573225789
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4017,2.244898,-5.755102,3.244898,4.244898,4.244898,3.244898,-5.755102,-5.755102,-5.755102,4.244898,...,4.244898,4.244898,-0.755102,1.244898,-5.755102,-5.755102,-5.755102,-5.755102,-5.755102,3.244898


Next we will drop the movies that user id have watch 

In [264]:
similar_user_book.drop(picked_userid_read.columns,axis=1, inplace = True,errors = 'ignore')
similar_user_book

ISBN,006016848X,0060173289,0060391626,0060392452,0060502258,0060916508,0060921145,0060922532,0060930535,0060934417,...,080411918X,0805063897,0842329242,0971880107,1400034779,1551668998,155874262X,1558743669,1558744630,1878424319
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13093,,,,,,,,,,,...,,,,-1.125,,,,,,
16161,,,,,,,,,,,...,,,,,,,,,,
69389,,,-0.064516,,,,,,,,...,,,-0.064516,,,,,,,
81207,,,,,,,,,,,...,,,,,,,,,,
89192,,,,,,,,,,,...,,,,,,,,,,
113519,-1.339506,-1.339506,-1.339506,,-1.339506,-1.339506,-1.339506,-1.339506,-1.339506,,...,-1.339506,-1.339506,,-1.339506,,,7.660494,-1.339506,-1.339506,
115120,,,-0.106509,,,,,,-0.106509,-0.106509,...,,,,-0.106509,-0.106509,-0.106509,,,,
187410,,,,,1.769231,,,,,,...,,,,,,,,,,-2.230769
253821,,,,,,,,,,,...,,,,-1.4,,,,,,
272715,,,,-0.888889,,,,,,,...,,,,-0.888889,,,,,,


In [265]:
similar_user_book.columns

Index(['006016848X', '0060173289', '0060391626', '0060392452', '0060502258',
       '0060916508', '0060921145', '0060922532', '0060930535', '0060934417',
       ...
       '080411918X', '0805063897', '0842329242', '0971880107', '1400034779',
       '1551668998', '155874262X', '1558743669', '1558744630', '1878424319'],
      dtype='object', name='ISBN', length=290)

Recommend book

In [266]:
item_score = {}

for i in similar_user_book.columns:
    book_rating = similar_user_book[i]
    # create a variable to score the score
    total = 0
    # variable to store the number of score
    divide = 0
    for u in similar_user.index:
        if pd.isna(book_rating[u]) == False:
            score = similar_user[u] * book_rating[u]
            total +=score
            divide += abs(book_rating[u])
    # get average score for item 
    item_score[i] = total/divide

m = 10
item_score = pd.DataFrame(item_score.items(), columns=['ISBN', 'book_score'])
ranked_item_score = item_score.sort_values(by= 'book_score', ascending=False)

ranked_book = ranked_item_score.head(10)
ranked_book

Unnamed: 0,ISBN,book_score
258,0679734775,0.180796
262,0684801523,0.180796
75,0373825013,0.164933
18,0061097314,0.159355
199,0452284449,0.159355
29,0151008116,0.159355
261,067976397X,0.159355
191,0449912558,0.159355
286,155874262X,0.159355
22,0070212570,0.159355


In [267]:
book_recommend = pd.merge(book, ranked_book, on="ISBN").sort_values(by= 'book_score', ascending=False)
book_recommend

Unnamed: 0,ISBN,title,author,year,publisher,book_score
3,0679734775,The House on Mango Street (Vintage Contemporar...,SANDRA CISNEROS,1991,Vintage,0.180796
8,0684801523,The Great Gatsby,F. Scott Fitzgerald,1995,Scribner,0.180796
9,0373825013,"Whirlwind (Tyler, Book 1)",Nancy Martin,1992,Harlequin,0.164933
0,067976397X,Corelli's Mandolin : A Novel,LOUIS DE BERNIERES,1995,Vintage,0.159355
1,155874262X,Chicken Soup for the Soul (Chicken Soup for th...,Jack Canfield,1993,Health Communications,0.159355
2,0151008116,Life of Pi,Yann Martel,2002,Harcourt,0.159355
4,0070212570,Fried Green Tomatoes at the Whistle Stop Cafe,Fannie Flagg,1989,McGraw-Hill,0.159355
5,0449912558,The Sparrow,MARY DORIA RUSSELL,1997,Fawcett Books,0.159355
6,0061097314,The Bean Trees,Barbara Kingsolver,1998,HarperTorch,0.159355
7,0452284449,The Virgin Blue,Tracy Chevalier,2003,Plume Books,0.159355
