In [1]:
import pandas as pd
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import sklearn

### Note:
- csr_matrix(): Compressed Sparse Row matrix
    - read more at https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html
    and
    https://machinelearningmastery.com/sparse-matrices-for-machine-learning/


In [2]:
book_data=pd.read_csv('book_recom.csv')

In [3]:
book_data.head()

Unnamed: 0,userID,ISBN,bookRating,bookTitle,totalRatingCount,Location
0,276725,034545104X,0,Flesh Tones: A Novel,60,"tyler, texas, usa"
1,2313,034545104X,5,Flesh Tones: A Novel,60,"cincinnati, ohio, usa"
2,6543,034545104X,0,Flesh Tones: A Novel,60,"strafford, missouri, usa"
3,8680,034545104X,5,Flesh Tones: A Novel,60,"st. charles county, missouri, usa"
4,10314,034545104X,9,Flesh Tones: A Novel,60,"beaverton, oregon, usa"


In [4]:
data_pivot =(book_data.pivot(index = 'bookTitle', columns = 'userID',
              values = 'bookRating').fillna(0))

In [5]:
data_pivot.shape

(198, 11337)

In [6]:
data_pivot.head(5)

userID,14,23,26,51,67,99,135,243,254,256,...,278483,278514,278535,278552,278582,278633,278740,278769,278773,278843
bookTitle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"A Child Called \It\"": One Child's Courage to Survive""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Cold Heart: An Alex Delaware Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Kiss of Shadows (Meredith Gentry Novels (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Man in Full,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
book_data_matrix = csr_matrix(data_pivot.values)
book_data_matrix

<198x11337 sparse matrix of type '<class 'numpy.float64'>'
	with 11110 stored elements in Compressed Sparse Row format>

In [8]:
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(book_data_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

### Recommendation for reading:
- Find the 5 similar (with maximum similarity - means lowest distance) 
books as that of the first book '1st to Die: A novel'.
- The function kneighbors() is used to find k number of neighbors of a point.
- We need to compute 6 neighbors from book1 to all other books.
Note that the shortest distance possible now is 0, which is book1 to book1 itself. Hence, we need to take 6 neighbors, instead of 5.


In [9]:
query_index1=0   # 1st book
distances, indices = model_knn.kneighbors(
    data_pivot.iloc[query_index1, :].values.reshape(1,-1), 
    n_neighbors = 6)


In [10]:
#display the distances
distances

array([[0.        , 0.85554748, 0.89964868, 0.90149976, 0.90925394,
        0.92285717]])

In [11]:
#display respective book indices
indices

array([[  0, 110,  12, 188,  26,  78]], dtype=int64)

In [12]:
print('Recommendations for {0}\n'.format(data_pivot.index[query_index1]))
for i in indices.flatten()[1:]:
    print(data_pivot.index[i])

Recommendations for 1st to Die: A Novel

Roses Are Red (Alex Cross Novels)
Along Came a Spider (Alex Cross Novels)
Violets Are Blue
Cat &amp; Mouse (Alex Cross Novels)
Jack &amp; Jill (Alex Cross Novels)


In [13]:
# as distances is a 2D array, we need to flatten it to make 1D array

for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}\n'.format(data_pivot.
              index[query_index1]))
    else:
        print('{0}: {1}, with distance of {2}'.
              format(i, data_pivot.index[indices.flatten()[i]], 
                     distances.flatten()[i]))


Recommendations for 1st to Die: A Novel

1: Roses Are Red (Alex Cross Novels), with distance of 0.8555474766709164
2: Along Came a Spider (Alex Cross Novels), with distance of 0.8996486778230974
3: Violets Are Blue, with distance of 0.9014997612208198
4: Cat &amp; Mouse (Alex Cross Novels), with distance of 0.909253938096594
5: Jack &amp; Jill (Alex Cross Novels), with distance of 0.9228571674498819
