In this notebook, I follow a test-driven development of the recommendation system through a trial of the test-metric functionality

In [2]:
from MovieLens import MovieLens
from surprise import SVD
from surprise import KNNBaseline
from surprise.model_selection import train_test_split
from surprise.model_selection import LeaveOneOut
from RecommenderMetrics import RecommenderMetrics

In [3]:
ml = MovieLens()

In [6]:
print("Loading movie ratings...")
data = ml.loadMovieLensLatestSmall()

Loading movie ratings...


In [7]:
print("Computing movie popularity ranks so we can measure novelty later...")
rankings = ml.getPopularityRanks()

Computing movie popularity ranks so we can measure novelty later...


In [8]:
print("Computing item similarities so we can measure diversity later...")
fullTrainSet = data.build_full_trainset()
sim_options = {'name': 'pearson_baseline', 'user_based': False}
simsAlgo = KNNBaseline(sim_options=sim_options)
simsAlgo.fit(fullTrainSet)

Computing item similarities so we can measure diversity later...
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x7f9009437250>

In [9]:
print("Building recommendation model...")
trainSet, testSet = train_test_split(data, test_size=.25, random_state=1)

algo = SVD(random_state=10)
algo.fit(trainSet)

Building recommendation model...


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f9009437c50>

In [10]:
print("Computing recommendations...")
predictions = algo.test(testSet)

Computing recommendations...


In [11]:
print("Evaluating accuracy of model...")
print("RMSE: ", RecommenderMetrics.RMSE(predictions))
print("MAE: ", RecommenderMetrics.MAE(predictions))

Evaluating accuracy of model...
RMSE:  0.87790565300794
MAE:  0.6731720779996845


In [12]:
print("Evaluating top-10 recommendations...")

# Set aside one rating per user for testing
LOOCV = LeaveOneOut(n_splits=1, random_state=1)

for trainSet, testSet in LOOCV.split(data):
    print("Computing recommendations with leave-one-out...")

    # Train model without left-out ratings
    algo.fit(trainSet)

    # Predicts ratings for left-out ratings only
    print("Predict ratings for left-out set...")
    leftOutPredictions = algo.test(testSet)

    # Build predictions for all ratings not in the training set
    print("Predict all missing ratings...")
    bigTestSet = trainSet.build_anti_testset()
    allPredictions = algo.test(bigTestSet)

    # Compute top 10 recs for each user
    print("Compute top 10 recs per user...")
    topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n=10)

    # See how often we recommended a movie the user actually rated
    print("\nHit Rate: ", RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions))

    # Break down hit rate by rating value
    print("\nrHR (Hit Rate by Rating value): ")
    RecommenderMetrics.RatingHitRate(topNPredicted, leftOutPredictions)

    # See how often we recommended a movie the user actually liked
    print("\ncHR (Cumulative Hit Rate, rating >= 4): ", RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions, 4.0))

    # Compute ARHR
    print("\nARHR (Average Reciprocal Hit Rank): ", RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions))



Evaluating top-10 recommendations...
Computing recommendations with leave-one-out...
Predict ratings for left-out set...
Predict all missing ratings...
Compute top 10 recs per user...

Hit Rate:  0.036065573770491806

rHR (Hit Rate by Rating value): 
2.5 0.06666666666666667
3.0 0.008695652173913044
4.0 0.044444444444444446
4.5 0.09433962264150944
5.0 0.056910569105691054

cHR (Cumulative Hit Rate, rating >= 4):  0.056179775280898875

ARHR (Average Reciprocal Hit Rank):  0.013333333333333332


In [13]:
print("Computing complete recommendations, no hold outs...")
algo.fit(fullTrainSet)
bigTestSet = fullTrainSet.build_anti_testset()
allPredictions = algo.test(bigTestSet)
topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n=10)

Computing complete recommendations, no hold outs...


In [14]:
# Print user coverage with a minimum predicted rating of 4.0:
print("User coverage: ", RecommenderMetrics.UserCoverage(topNPredicted, fullTrainSet.n_users, ratingThreshold=4.0))

User coverage:  0.9245901639344263


In [15]:
# Measure diversity of recommendations:
print("Diversity: ", RecommenderMetrics.Diversity(topNPredicted, simsAlgo))

Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Diversity:  0.9642412821104059


In [16]:
# Measure novelty (average popularity rank of recommendations):
print("Novelty (average popularity rank): ", RecommenderMetrics.Novelty(topNPredicted, rankings))


Novelty (average popularity rank):  504.3873857062885
