## k-Nearest Neighbors 

### User-based filtering
Calculate similarities between users and predict preferences based on similar users' preferences

In [1]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import cross_validate
import os

In [2]:
# Path to dataset folder

file_path = os.path.expanduser('ml-1m/ratings.dat')

In [13]:
# Define a reader
# 'user item rating timestamp' are each separated by '::' characters

columns = ['user_id','item_id','rating','timestamp']
reader = Reader(line_format = 'user item rating timestamp', sep='::')

data = Dataset.load_from_file(file_path, reader=reader)
trainset = data.build_full_trainset()

sim_options = {'name': 'cosine', 'user_based': True}
knn_model = KNNBasic(sim_options = sim_options)

knn_model.fit(trainset)

# Replace user id
user_id = str(196)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [14]:
# Get items that the user has not rated
items_to_predict = [(user_id, iid, 4.0) for iid in trainset.all_items() if iid not in trainset.ur[trainset.to_inner_uid(user_id)]]

In [15]:
# Get top N recommendations for the user
top_n = knn_model.test(items_to_predict)[0:11]

In [16]:
# Display the top N recommendations
for uid, iid, true_r, est, _ in top_n:
    print(f'User {uid} -> Item {iid} (Predicted rating: {est:.2f})')

User 196 -> Item 0 (Predicted rating: 3.58)
User 196 -> Item 1 (Predicted rating: 3.58)
User 196 -> Item 2 (Predicted rating: 3.58)
User 196 -> Item 3 (Predicted rating: 3.58)
User 196 -> Item 4 (Predicted rating: 3.58)
User 196 -> Item 5 (Predicted rating: 3.58)
User 196 -> Item 6 (Predicted rating: 3.58)
User 196 -> Item 7 (Predicted rating: 3.58)
User 196 -> Item 8 (Predicted rating: 3.58)
User 196 -> Item 9 (Predicted rating: 3.58)
User 196 -> Item 10 (Predicted rating: 3.58)


## Item-based filtering
Calculate similarities between items and predict preferences based on the preferences of similar items

The code below is the exact same code as above, except for the modification of the 'user_based' parameter is now set to False

In [17]:
# Define a reader
# 'user item rating timestamp' are each separated by '::' characters

columns = ['user_id','item_id','rating','timestamp']
reader = Reader(line_format = 'user item rating timestamp', sep='::')

data = Dataset.load_from_file(file_path, reader=reader)
trainset = data.build_full_trainset()

sim_options = {'name': 'cosine', 'user_based': False}
knn_model = KNNBasic(sim_options = sim_options)

knn_model.fit(trainset)

# Replace user id
user_id = str(196)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [18]:
# Get items that the user has not rated
items_to_predict = [(user_id, iid, 4.0) for iid in trainset.all_items() if iid not in trainset.ur[trainset.to_inner_uid(user_id)]]

In [19]:
# Get top N recommendations for the user
top_n = knn_model.test(items_to_predict)[0:11]

In [20]:
# Display the top N recommendations
for uid, iid, true_r, est, _ in top_n:
    print(f'User {uid} -> Item {iid} (Predicted rating: {est:.2f})')

User 196 -> Item 0 (Predicted rating: 3.58)
User 196 -> Item 1 (Predicted rating: 3.58)
User 196 -> Item 2 (Predicted rating: 3.58)
User 196 -> Item 3 (Predicted rating: 3.58)
User 196 -> Item 4 (Predicted rating: 3.58)
User 196 -> Item 5 (Predicted rating: 3.58)
User 196 -> Item 6 (Predicted rating: 3.58)
User 196 -> Item 7 (Predicted rating: 3.58)
User 196 -> Item 8 (Predicted rating: 3.58)
User 196 -> Item 9 (Predicted rating: 3.58)
User 196 -> Item 10 (Predicted rating: 3.58)
