In [2]:
import surprise
import numpy as np
import pandas as pd
import io

In [3]:
# URL or path to your .txt file
file_path = 'E:\Vocational\Lighthouse Labs\Flex Course\C08_Machine Learning Application\exercise_recommender_engines\data\\ratings.txt'

# Read the .txt file into a pandas DataFrame
# Assuming the file is space-separated and has no header
dataset = pd.read_csv(file_path, sep=' ', names=['uid', 'iid', 'rating'])

# Display the first few rows of the DataFrame
dataset.head()

Unnamed: 0,uid,iid,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [4]:
lower_rating = dataset['rating'].min()
upper_rating = dataset['rating'].max()
print('Review range: {0} to {1}'.format(lower_rating, upper_rating))

Review range: 0.5 to 4.0


In [5]:
reader = surprise.Reader(rating_scale=(0.5, 4.))
data = surprise.Dataset.load_from_df(dataset, reader)

In [6]:
alg = surprise.SVDpp()
output = alg.fit(data.build_full_trainset())

In [7]:
# The uids and iids should be set as strings
pred = alg.predict(uid='50', iid='52')
score = pred.est
print(score)

3.0028030537791928


In [8]:
# Get a list of all movie ids
iids = dataset['iid'].unique()
# Get a list of iids that uid 50 has rated
iids50 = dataset.loc[dataset['uid'] == 50, 'iid']
# Remove the iids that uid 50 has rated from the list of all movie ids
iids_to_pred = np.setdiff1d(iids, iids50)

In [9]:
testset = [[50, iid, 4.] for iid in iids_to_pred]
predictions = alg.test(testset)
predictions[0]

Prediction(uid=50, iid=14, r_ui=4.0, est=3.2100636280755324, details={'was_impossible': False})

In [10]:
pred_ratings = np.array([pred.est for pred in predictions])
# Find the index of the maximum predicted rating
i_max = pred_ratings.argmax()
# Use this to find the corresponding iid to recommend
iid = iids_to_pred[i_max]
print('Top item for user 50 has iid {0} with predicted rating {1}'.format(iid, pred_ratings[i_max]))

Top item for user 50 has iid 189 with predicted rating 4.0


In [11]:
param_grid = {'lr_all': [.001, .01], 'reg_all': [.1, .5]}
gs = surprise.model_selection.GridSearchCV(surprise.SVDpp, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)
# Print combination of parameters that gave best RMSE score
print(gs.best_params['rmse'])

{'lr_all': 0.01, 'reg_all': 0.1}


In [12]:
alg = surprise.SVDpp(lr_all=.001) # parameter choices can be added here.
output = surprise.model_selection.cross_validate(alg, data, verbose=True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8277  0.8241  0.8238  0.8301  0.8377  0.8287  0.0051  
MAE (testset)     0.6587  0.6520  0.6513  0.6540  0.6626  0.6557  0.0043  
Fit time          1.52    1.61    2.08    2.17    2.09    1.89    0.27    
Test time         0.28    0.38    0.37    0.36    0.38    0.35    0.04    
