In [1]:
from surprise import KNNWithMeans, KNNBasic
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

import pandas as pd

In [2]:
movies = pd.read_csv('data/movies.csv')
ratings = pd.read_csv('data/ratings.csv')

In [25]:
ratings.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
movies_with_ratings = movies.join(ratings.set_index('movieId'), on='movieId')
movies_with_ratings.dropna(inplace=True)

In [6]:
movies_with_ratings[movies_with_ratings.userId == 2.0].movieId.unique()

array([   318,    333,   1704,   3578,   6874,   8798,  46970,  48516,
        58559,  60756,  68157,  71535,  74458,  77455,  79132,  80489,
        80906,  86345,  89774,  91529,  91658,  99114, 106782, 109487,
       112552, 114060, 115713, 122882, 131724])

In [8]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.movieId,
    'rating': movies_with_ratings.rating
})

In [10]:
dataset.head(20)

Unnamed: 0,uid,iid,rating
0,1.0,1,4.0
0,5.0,1,4.0
0,7.0,1,4.5
0,15.0,1,2.5
0,17.0,1,4.5
0,18.0,1,3.5
0,19.0,1,4.0
0,21.0,1,3.5
0,27.0,1,3.0
0,31.0,1,5.0


In [11]:
ratings.rating.min()

0.5

In [12]:
ratings.rating.max()

5.0

In [13]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(dataset, reader)

In [14]:
trainset, testset = train_test_split(data, test_size=.15)

In [15]:
algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': True})
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7f30440832e0>

In [16]:
test_pred = algo.test(testset)

In [17]:
accuracy.rmse(test_pred, verbose=True)

RMSE: 0.8854


0.8854427593995103

In [19]:
algo.predict(uid=2, iid=1)

Prediction(uid=2, iid=1, r_ui=None, est=4.138135950944892, details={'actual_k': 42, 'was_impossible': False})

In [21]:
algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': False})
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7f3094693820>

In [22]:
test_pred = algo.test(testset)

In [23]:
accuracy.rmse(test_pred, verbose=True)

RMSE: 0.8754


0.8754269032190497

In [24]:
algo.predict(uid=2, iid=6)

Prediction(uid=2, iid=6, r_ui=None, est=3.9767247516018536, details={'actual_k': 13, 'was_impossible': False})