In [2]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

# Load the movielens-100k dataset (download it if needed).
data = Dataset.load_builtin('ml-100k')

# Use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9390  0.9378  0.9337  0.9460  0.9280  0.9369  0.0059  
MAE (testset)     0.7388  0.7395  0.7358  0.7444  0.7318  0.7380  0.0042  
Fit time          4.31    4.28    4.34    4.53    4.37    4.37    0.09    
Test time         0.11    0.13    0.11    0.14    0.11    0.12    0.01    


{'test_rmse': array([0.93898668, 0.93784245, 0.933718  , 0.94602311, 0.92804887]),
 'test_mae': array([0.73878044, 0.73950824, 0.73575365, 0.7444313 , 0.73177067]),
 'fit_time': (4.309417486190796,
  4.280559301376343,
  4.341419219970703,
  4.526926040649414,
  4.372278928756714),
 'test_time': (0.11273932456970215,
  0.13463997840881348,
  0.10864877700805664,
  0.13660526275634766,
  0.10974359512329102)}

In [8]:
from __future__ import (absolute_import, division, print_function, unicode_literals)
import os

from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate

file_path = os.path.expanduser('u.data')
reader = Reader(line_format='user item rating timestamp', sep = "\t")
data = Dataset.load_from_file(file_path, reader = reader)

algo = SVD()

cross_validate(algo, data, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9428  0.9403  0.9288  0.9319  0.9326  0.9353  0.0053  
MAE (testset)     0.7429  0.7404  0.7323  0.7342  0.7347  0.7369  0.0040  
Fit time          5.11    5.53    5.67    5.65    5.63    5.52    0.21    
Test time         0.17    0.18    0.14    0.18    0.16    0.17    0.02    


{'test_rmse': array([0.94281289, 0.94033574, 0.92881769, 0.93187027, 0.93262967]),
 'test_mae': array([0.74288069, 0.74040829, 0.73226654, 0.73423809, 0.73472002]),
 'fit_time': (5.109337568283081,
  5.527255535125732,
  5.66679310798645,
  5.65388298034668,
  5.632977247238159),
 'test_time': (0.17353582382202148,
  0.18148303031921387,
  0.14065217971801758,
  0.1845076084136963,
  0.1555466651916504)}

In [9]:
import pandas as pd
from surprise import KNNBasic


file_path = os.path.expanduser('u.data')
reader = Reader(line_format='user item rating timestamp', sep = "\t")
data = Dataset.load_from_file(file_path, reader = reader)
trainset = data.build_full_trainset()

print(trainset)

df = pd.DataFrame(data.raw_ratings, columns = ["uid", "iid", "rate", "timestamp"])

sim_options = {'name' : 'cosine', 'user_based' : True}
algo = KNNBasic(k = 40, min_k = 1, sim_options = sim_options)
algo.fit(trainset)

uid = str(1)
iids = df[df.uid == uid]



for i in range(1, len(iids)):
    iid = iids[i-1:i].iid.values[0]
    r_ui = iids[i-1:i].rate.values[0]
    pred = algo.predict(uid, iid, r_ui, verbose = True)

<surprise.trainset.Trainset object at 0x00000151468B6CC0>
Computing the cosine similarity matrix...
Done computing similarity matrix.
user: 1          item: 61         r_ui = 4.00   est = 4.10   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 189        r_ui = 3.00   est = 4.07   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 33         r_ui = 4.00   est = 3.60   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 160        r_ui = 4.00   est = 3.70   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 20         r_ui = 4.00   est = 3.68   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 202        r_ui = 5.00   est = 3.98   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 171        r_ui = 5.00   est = 4.18   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 265        r_ui = 4.00   est = 3.85   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 155        r_ui = 2

user: 1          item: 16         r_ui = 5.00   est = 3.22   {'actual_k': 39, 'was_impossible': False}
user: 1          item: 79         r_ui = 4.00   est = 4.13   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 261        r_ui = 1.00   est = 2.51   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 45         r_ui = 5.00   est = 4.38   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 48         r_ui = 5.00   est = 4.48   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 25         r_ui = 4.00   est = 3.95   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 251        r_ui = 4.00   est = 4.50   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 195        r_ui = 5.00   est = 4.25   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 153        r_ui = 3.00   est = 3.87   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 101        r_ui = 2.00   est = 3.37   {'actual_k':

In [10]:
import pandas as pd
from surprise import KNNWithMeans


file_path = os.path.expanduser('u.data')
reader = Reader(line_format='user item rating timestamp', sep = "\t")
data = Dataset.load_from_file(file_path, reader = reader)
trainset = data.build_full_trainset()

#print(trainset)

df = pd.DataFrame(data.raw_ratings, columns = ["uid", "iid", "rate", "timestamp"])

sim_options = {'name' : 'cosine', 'user_based' : True}
algo = KNNWithMeans(k = 40, min_k = 1, sim_options = sim_options)
algo.fit(trainset)

uid = str(1)
iids = df[df.uid == uid]



for i in range(1, len(iids)):
    iid = iids[i-1:i].iid.values[0]
    r_ui = iids[i-1:i].rate.values[0]
    pred = algo.predict(uid, iid, r_ui, verbose = True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
user: 1          item: 61         r_ui = 4.00   est = 3.98   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 189        r_ui = 3.00   est = 3.98   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 33         r_ui = 4.00   est = 3.50   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 160        r_ui = 4.00   est = 3.62   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 20         r_ui = 4.00   est = 3.57   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 202        r_ui = 5.00   est = 3.79   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 171        r_ui = 5.00   est = 4.14   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 265        r_ui = 4.00   est = 3.66   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 155        r_ui = 2.00   est = 2.77   {'actual_k': 40, 'was_impossible': Fals

user: 1          item: 245        r_ui = 2.00   est = 2.74   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 35         r_ui = 1.00   est = 2.39   {'actual_k': 11, 'was_impossible': False}
user: 1          item: 137        r_ui = 5.00   est = 3.87   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 127        r_ui = 5.00   est = 4.35   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 16         r_ui = 5.00   est = 3.39   {'actual_k': 39, 'was_impossible': False}
user: 1          item: 79         r_ui = 4.00   est = 3.90   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 261        r_ui = 1.00   est = 2.64   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 45         r_ui = 5.00   est = 4.20   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 48         r_ui = 5.00   est = 4.18   {'actual_k': 40, 'was_impossible': False}
user: 1          item: 25         r_ui = 4.00   est = 3.83   {'actual_k':

In [11]:
import pandas as pd
from surprise import SVD


file_path = os.path.expanduser('u.data')
reader = Reader(line_format='user item rating timestamp', sep = "\t")
data = Dataset.load_from_file(file_path, reader = reader)
trainset = data.build_full_trainset()

#print(trainset)

df = pd.DataFrame(data.raw_ratings, columns = ["uid", "iid", "rate", "timestamp"])

algo = SVD(n_factors = 100, n_epochs = 20, biased = False, lr_all = 0.005, reg_all = 0)
algo.fit(trainset)

uid = str(1)
iids = df[df.uid == uid]



for i in range(1, len(iids)):
    iid = iids[i-1:i].iid.values[0]
    r_ui = iids[i-1:i].rate.values[0]
    pred = algo.predict(uid, iid, r_ui, verbose = True)

user: 1          item: 61         r_ui = 4.00   est = 4.19   {'was_impossible': False}
user: 1          item: 189        r_ui = 3.00   est = 4.26   {'was_impossible': False}
user: 1          item: 33         r_ui = 4.00   est = 4.21   {'was_impossible': False}
user: 1          item: 160        r_ui = 4.00   est = 4.15   {'was_impossible': False}
user: 1          item: 20         r_ui = 4.00   est = 4.37   {'was_impossible': False}
user: 1          item: 202        r_ui = 5.00   est = 3.93   {'was_impossible': False}
user: 1          item: 171        r_ui = 5.00   est = 5.00   {'was_impossible': False}
user: 1          item: 265        r_ui = 4.00   est = 4.00   {'was_impossible': False}
user: 1          item: 155        r_ui = 2.00   est = 2.22   {'was_impossible': False}
user: 1          item: 117        r_ui = 3.00   est = 3.44   {'was_impossible': False}
user: 1          item: 47         r_ui = 4.00   est = 3.89   {'was_impossible': False}
user: 1          item: 222        r_ui = 4.

user: 1          item: 115        r_ui = 5.00   est = 4.41   {'was_impossible': False}
user: 1          item: 11         r_ui = 2.00   est = 3.19   {'was_impossible': False}
user: 1          item: 245        r_ui = 2.00   est = 2.23   {'was_impossible': False}
user: 1          item: 35         r_ui = 1.00   est = 1.86   {'was_impossible': False}
user: 1          item: 137        r_ui = 5.00   est = 5.00   {'was_impossible': False}
user: 1          item: 127        r_ui = 5.00   est = 4.56   {'was_impossible': False}
user: 1          item: 16         r_ui = 5.00   est = 4.15   {'was_impossible': False}
user: 1          item: 79         r_ui = 4.00   est = 3.65   {'was_impossible': False}
user: 1          item: 261        r_ui = 1.00   est = 1.81   {'was_impossible': False}
user: 1          item: 45         r_ui = 5.00   est = 4.96   {'was_impossible': False}
user: 1          item: 48         r_ui = 5.00   est = 5.00   {'was_impossible': False}
user: 1          item: 25         r_ui = 4.

In [12]:
import pandas as pd
from surprise import SVD


file_path = os.path.expanduser('u.data')
reader = Reader(line_format='user item rating timestamp', sep = "\t")
data = Dataset.load_from_file(file_path, reader = reader)
trainset = data.build_full_trainset()

#print(trainset)

df = pd.DataFrame(data.raw_ratings, columns = ["uid", "iid", "rate", "timestamp"])

algo = SVD(n_factors = 100, n_epochs = 20, biased = False, lr_all = 0.005, reg_all = 0.02)
algo.fit(trainset)

uid = str(1)
iids = df[df.uid == uid]



for i in range(1, len(iids)):
    iid = iids[i-1:i].iid.values[0]
    r_ui = iids[i-1:i].rate.values[0]
    pred = algo.predict(uid, iid, r_ui, verbose = True)

user: 1          item: 61         r_ui = 4.00   est = 4.49   {'was_impossible': False}
user: 1          item: 189        r_ui = 3.00   est = 4.05   {'was_impossible': False}
user: 1          item: 33         r_ui = 4.00   est = 3.78   {'was_impossible': False}
user: 1          item: 160        r_ui = 4.00   est = 3.76   {'was_impossible': False}
user: 1          item: 20         r_ui = 4.00   est = 4.22   {'was_impossible': False}
user: 1          item: 202        r_ui = 5.00   est = 4.43   {'was_impossible': False}
user: 1          item: 171        r_ui = 5.00   est = 4.86   {'was_impossible': False}
user: 1          item: 265        r_ui = 4.00   est = 3.71   {'was_impossible': False}
user: 1          item: 155        r_ui = 2.00   est = 2.35   {'was_impossible': False}
user: 1          item: 117        r_ui = 3.00   est = 3.55   {'was_impossible': False}
user: 1          item: 47         r_ui = 4.00   est = 3.71   {'was_impossible': False}
user: 1          item: 222        r_ui = 4.

In [13]:
import pandas as pd
from surprise import SVD


file_path = os.path.expanduser('u.data')
reader = Reader(line_format='user item rating timestamp', sep = "\t")
data = Dataset.load_from_file(file_path, reader = reader)
trainset = data.build_full_trainset()

#print(trainset)

df = pd.DataFrame(data.raw_ratings, columns = ["uid", "iid", "rate", "timestamp"])

algo = SVD(n_factors = 100, n_epochs = 20, biased = True, lr_all = 0.005, reg_all = 0.02)
algo.fit(trainset)

uid = str(1)
iids = df[df.uid == uid]



for i in range(1, len(iids)):
    iid = iids[i-1:i].iid.values[0]
    r_ui = iids[i-1:i].rate.values[0]
    pred = algo.predict(uid, iid, r_ui, verbose = True)

user: 1          item: 61         r_ui = 4.00   est = 4.35   {'was_impossible': False}
user: 1          item: 189        r_ui = 3.00   est = 3.74   {'was_impossible': False}
user: 1          item: 33         r_ui = 4.00   est = 3.28   {'was_impossible': False}
user: 1          item: 160        r_ui = 4.00   est = 3.54   {'was_impossible': False}
user: 1          item: 20         r_ui = 4.00   est = 3.55   {'was_impossible': False}
user: 1          item: 202        r_ui = 5.00   est = 4.30   {'was_impossible': False}
user: 1          item: 171        r_ui = 5.00   est = 4.52   {'was_impossible': False}
user: 1          item: 265        r_ui = 4.00   est = 4.32   {'was_impossible': False}
user: 1          item: 155        r_ui = 2.00   est = 2.39   {'was_impossible': False}
user: 1          item: 117        r_ui = 3.00   est = 3.24   {'was_impossible': False}
user: 1          item: 47         r_ui = 4.00   est = 4.15   {'was_impossible': False}
user: 1          item: 222        r_ui = 4.

user: 1          item: 52         r_ui = 4.00   est = 3.88   {'was_impossible': False}
user: 1          item: 88         r_ui = 4.00   est = 3.54   {'was_impossible': False}
user: 1          item: 13         r_ui = 5.00   est = 4.39   {'was_impossible': False}
user: 1          item: 28         r_ui = 4.00   est = 4.11   {'was_impossible': False}
user: 1          item: 172        r_ui = 5.00   est = 5.00   {'was_impossible': False}
user: 1          item: 122        r_ui = 3.00   est = 2.61   {'was_impossible': False}
user: 1          item: 152        r_ui = 5.00   est = 4.31   {'was_impossible': False}
