In [5]:
from MovieLens import MovieLens
from surprise import SVD, SVDpp
from surprise import NormalPredictor
from surprise.model_selection import GridSearchCV
from Evaluator import Evaluator
import random
import numpy as np

In [2]:
def LoadMovieLensData():
    ml = MovieLens()
    print("Loading movie ratings...")
    data = ml.load_movies_dataset()
    print("\nComputing movie popularity ranks...")
    rankings = ml.get_popularity_ranks()
    return (ml, data, rankings)

np.random.seed(0)
random.seed(0)

In [3]:
# Load up common data set for the recommender algorithms
(ml, evaluationData, rankings) = LoadMovieLensData()

Loading movie ratings...

Computing movie popularity ranks...


In [6]:
print("Searching for best parameters...")
param_grid = {'n_epochs': [20, 30], 'lr_all': [0.005, 0.010],
              'n_factors': [50, 100]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(evaluationData)

Searching for best parameters...


In [7]:
# best RMSE score
print("Best RMSE score attained: ", gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

Best RMSE score attained:  0.8776329487069042
{'n_epochs': 20, 'lr_all': 0.005, 'n_factors': 50}


In [8]:
# Construct an evaluator to evaluate them
evaluator = Evaluator(evaluationData, rankings)

params = gs.best_params['rmse']
SVDtuned = SVD(n_epochs = params['n_epochs'], lr_all = params['lr_all'], n_factors = params['n_factors'])
evaluator.AddAlgorithm(SVDtuned, "SVD - Tuned")

SVDUntuned = SVD()
evaluator.AddAlgorithm(SVDUntuned, "SVD - Untuned")

# Make a random recommendations to compare
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [9]:
# Compare them
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Evaluating  SVD - Tuned ...
Evaluating accuracy...
Analysis complete.
Evaluating  SVD - Untuned ...
Evaluating accuracy...
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Analysis complete.


Algorithm  RMSE       MAE       
SVD - Tuned 0.8798     0.6748    
SVD - Untuned 0.8817     0.6759    
Random     1.4240     1.1349    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean better accuracy.

Using recommender  SVD - Tuned

Building recommendation model...
Computing recommendations...

We recommend:
Shawshank Redemption, The (1994) 4.671092261928114
There Will Be Blood (2007) 4.643516128856176
City of God (Cidade de Deus) (2002) 4.625487008941527
Eternal Sunshine of the Spotless Mind (2004) 4.613154561288935
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964) 4.607189309577227
Blade Runner (1982) 4.597743330419324
Pianist, The (2002) 4.596961727969341
Life Is Beauti