In [1]:
import surprise

#### Dataset

In [12]:
import numpy as np
import pandas as pd

dataset = pd.read_csv('data/ratings.txt', sep=' ', header=None, names=['uid', 'iid', 'rating'])

dataset.head()

Unnamed: 0,uid,iid,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


#### Fitting the Model

In [13]:
lower_rating = dataset['rating'].min()
upper_rating = dataset['rating'].max()
print(f'Review range: {lower_rating} to {upper_rating}')

Review range: 0.5 to 4.0


In [14]:
reader = surprise.Reader(rating_scale=(0.5, 4))
data = surprise.Dataset.load_from_df(dataset, reader)

In [15]:
alg = surprise.SVDpp()
output = alg.fit(data.build_full_trainset())

In [17]:
# the uids and iids should be set as strings
pred = alg.predict(uid='50', iid='52')
score = pred.est
print(score)

3.0028030537791928


#### Making Recommendations

In [18]:
# get a list of all movie ids
iids = dataset['iid'].unique()

# get a list of iids that uid 50 has rated
iids50 = dataset.loc[dataset['uid'] == 50, 'iid']

# remove the iids that uid 50 has rated from the list of all move ids
iids_to_pred = np.setdiff1d(iids, iids50)

In [21]:
testset = [[50, iid, 4] for iid in iids_to_pred]
predictions = alg.test(testset)
predictions[0]

Prediction(uid=50, iid=14, r_ui=4, est=3.1117265781891814, details={'was_impossible': False})

In [22]:
pred_ratings = np.array([pred.est for pred in predictions])

# find the index of the maximum predicted rating
i_max = pred_ratings.argmax()

# use this to find the corresponding iid to recommend
iid = iids_to_pred[i_max]

print(f'Top item for user 50 has iid {iid}, with predicted rating {i_max}')

Top item for user 50 has iid 242, with predicted rating 210


#### Tuning and Evaluating the Model

In [23]:
param_grid = {'lr_all': [.001, .01], 'reg_all': [.1, .5]}
gs = surprise.model_selection.GridSearchCV(surprise.SVDpp, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

# print combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

{'lr_all': 0.01, 'reg_all': 0.1}


In [24]:
alg = surprise.SVDpp(lr_all=.001) #parameter choices can be added here
output = surprise.model_selection.cross_validate(alg, data, verbose=True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8241  0.8384  0.8373  0.8184  0.8225  0.8281  0.0081  
MAE (testset)     0.6556  0.6623  0.6597  0.6491  0.6506  0.6555  0.0051  
Fit time          12.55   12.52   12.31   12.29   12.52   12.44   0.11    
Test time         0.21    0.22    0.22    0.22    0.29    0.23    0.03    
