### Implementation of KNN

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split

### Reading the dataset which was preprocessed

In [2]:
book_ratings = pd.read_csv('preprocessed_ratings.csv')

In [3]:
book_ratings.head()

Unnamed: 0,User_ID,ISBN,Book_Rating,Book_Title,Book_Author,Year_Of_Publication,Publisher,Location,Age
0,53,451,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,"strafford, missouri, usa",34.0
1,53,280,0,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown","strafford, missouri, usa",34.0
2,53,647,0,The Da Vinci Code,Dan Brown,2003,Doubleday,"strafford, missouri, usa",34.0
3,53,2028,0,Wild Animus,Rich Shapero,2004,Too Far,"strafford, missouri, usa",34.0
4,53,241,0,Four To Score (A Stephanie Plum Novel),Janet Evanovich,1999,St. Martin's Paperbacks,"strafford, missouri, usa",34.0


### Split the dataset into train and test

In [4]:
train_data, test_data = train_test_split(book_ratings, test_size=0.20)

### Make ratings matrix 

In [5]:
train_data_pivot = train_data.pivot(index='ISBN', columns='User_ID', values='Book_Rating').fillna(0)

### Fill in the columns which ignored in the ratings matrix, in order to have the same dimensionality with original matrix

In [6]:
diff_cols_train = list(set(range(2954)) - set(train_data_pivot.columns))
for user in diff_cols_train:
    train_data_pivot[str(user)] = 0

In [7]:
train_data_matrix = csr_matrix(train_data_pivot.values)

### Do the same for test dataset

In [8]:
test_data_pivot = test_data.pivot(index='ISBN', columns='User_ID', values='Book_Rating').fillna(0)

In [9]:
diff_cols_test = list(set(range(2954)) - set(test_data_pivot.columns))
for user in diff_cols_test:
    test_data_pivot[str(user)] = 0

### Make a KNN model and fit it

In [10]:
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(train_data_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

### Ok, now try to make recommendations for an input row of test dataset

In [66]:
query_index = 37 # For examle
distance, indices = knn.kneighbors(test_data_pivot.iloc[query_index, :].values.reshape(1, -1), n_neighbors= 11)

for i in range(len(distance.flatten())):
    if i == 0:
        print("Recommendation for {0}:\n".format(book_ratings[book_ratings['ISBN'] == train_data_pivot.index[query_index]]['Book_Title'].unique()))
    else:
        print("{0}: {1}".format(i, book_ratings[book_ratings['ISBN'] == train_data_pivot.index[indices.flatten()[i]]]['Book_Title'].unique()))

Recommendation for ['The Lost Continent: Travels in Small-Town America']:

1: ['The Road to Omaha']
2: ['While My Pretty One Sleeps']
3: ['The Bear and the Dragon (Jack Ryan Novels)']
4: ['Milk and Honey (Peter Decker/Rina Lazarus Novels)']
5: ['Stone Kiss']
6: ['The Anastasia Syndrome']
7: ['Weep No More My Lady']
8: ['Moonlight Becomes You']
9: ['Before I Say Good-Bye']
10: ["Pretend You Don't See Her"]
