In [16]:
from surprise import Dataset, Reader, BaselineOnly
from surprise.model_selection import train_test_split

# build dataset from csv
file_path = "./BX-CSV/BookRatings.csv"
# header needs to be skipped
reader = Reader(line_format='user item rating', sep=',', rating_scale=(0,10), skip_lines=1)
data = Dataset.load_from_file(file_path, reader=reader)

# BaselineOnly achieves the lowest rmse error
trainset, testset = train_test_split(data, test_size=0.25, random_state=10)
algo = BaselineOnly()
predictions = algo.fit(trainset).test(testset)
predictions[:5]

Estimating biases using als...


[Prediction(uid='81609', iid='671534734', r_ui=0.0, est=0.9340112184697642, details={'was_impossible': False}),
 Prediction(uid='21404', iid='679731725', r_ui=0.0, est=2.7134207423049914, details={'was_impossible': False}),
 Prediction(uid='11676', iid='449223795', r_ui=0.0, est=3.8536137483516444, details={'was_impossible': False}),
 Prediction(uid='153718', iid='515137111', r_ui=9.0, est=3.8171591746153424, details={'was_impossible': False}),
 Prediction(uid='242646', iid='440220602', r_ui=0.0, est=2.6897757263727917, details={'was_impossible': False})]

### Hit Rate

To evaluate top-10, we use hit rate, that is, if a user rated one of the top-10 we recommended, we consider it is a “hit”.

The process of compute hit rate for a single user:
1. Find all items in this user’s history in the training data.
2. Intentionally remove one of these items ( Leave-One-Out cross-validation).
3. Use all other items to feed the recommender and ask for top 10 recommendations.
4. If the removed item appear in the top 10 recommendations, it is a hit. If not, it’s not a hit.

A very low hit rate simply means we do not have enough data to work with. Just like Amazon’s hit rate to me would be terribly low because it does not have enough of my book purchase data.

In [17]:
import itertools
from surprise import accuracy
from collections import defaultdict

class RecommenderMetrics:

    def MAE(predictions):
        return accuracy.mae(predictions, verbose=False)

    def RMSE(predictions):
        return accuracy.rmse(predictions, verbose=False)

    def GetTopN(predictions, n=10, min_rating=4.0):
        # every user get top N recommended books
        topN = defaultdict(list)

        for user_id, book_id, actual_rating, estimated_rating, _ in predictions:
            if (estimated_rating >= min_rating):
                topN[user_id].append((book_id, estimated_rating))

        for user_id, ratings in topN.items():
            ratings.sort(key=lambda x: x[1], reverse=True)
            topN[user_id] = ratings[:n]

        return topN
    
    def HitRate(topN_predicted, left_out_predictions):
             
        # remove one topN book from user training data, recommend topN books in testing phase
        hits = 0
        total = 0

        for left_out in left_out_predictions:
            user_id = left_out[0]
            left_out_book_id = left_out[1]
            # Is it in the predicted top 10 for this user?
            hit = False
            for book_id, predicted_rating in topN_predicted[user_id]:
                if (left_out_book_id == book_id):
                    hit = True
                    break
            if (hit):
                hits += 1

            total += 1

        # Compute overall precision
        return hits/total
    
    def RatingHitRate(topN_predicted, left_out_predictions):
        hits = defaultdict(float)
        total = defaultdict(float)

        # For each left-out rating
        for user_id, left_out_book_id, actual_rating, estimated_rating, _ in left_out_predictions:
            # Is it in the predicted top N for this user?
            hit = False
            for book_id, predicted_rating in topN_predicted[user_id]:
                if (left_out_book_id == book_id):
                    hit = True
                    break
            if (hit) :
                hits[actual_rating] += 1

            total[actual_rating] += 1

        # Compute overall precision
        for rating in sorted(hits.keys()):
            print (rating, hits[rating] / total[rating])
    
    def CumulativeHitRate(topN_predicted, left_out_predictions, rating_cutoff=0):
        hits = 0
        total = 0

        # For each left-out rating
        for user_id, left_out_book_id, actual_rating, estimated_rating, _ in left_out_predictions:
            # Only look at ability to recommend things the users actually liked...
            if (actual_rating >= rating_cutoff):
                # Is it in the predicted top 10 for this user?
                hit = False
                for book_id, predicted_rating in topN_predicted[user_id]:
                    if (left_out_book_id == book_id):
                        hit = True
                        break
                if (hit):
                    hits += 1
                total += 1

        return hits/total

    def AverageReciprocalHitRank(topN_predicted, left_out_predictions):
        summation = 0
        total = 0
        # For each left-out rating
        for user_id, left_out_book_id, actual_rating, estimated_rating, _ in left_out_predictions:
            # Is it in the predicted top N for this user?
            hitRank = 0
            rank = 0
            for book_id, predicted_rating in topN_predicted[user_id]:
                rank = rank + 1
                if (left_out_book_id == book_id):
                    hitRank = rank
                    break
            if (hitRank > 0) :
                summation += 1.0 / hitRank

            total += 1

        return summation / total
    
#     # What percentage of users have at least one "good" recommendation
#     def UserCoverage(topNPredicted, numUsers, ratingThreshold=0):
#         hits = 0
#         for userID in topNPredicted.keys():
#             hit = False
#             for movieID, predictedRating in topNPredicted[userID]:
#                 if (predictedRating >= ratingThreshold):
#                     hit = True
#                     break
#             if (hit):
#                 hits += 1

#         return hits / numUsers

#     def Diversity(topNPredicted, simsAlgo):
#         n = 0
#         total = 0
#         simsMatrix = simsAlgo.compute_similarities()
#         for userID in topNPredicted.keys():
#             pairs = itertools.combinations(topNPredicted[userID], 2)
#             for pair in pairs:
#                 movie1 = pair[0][0]
#                 movie2 = pair[1][0]
#                 innerID1 = simsAlgo.trainset.to_inner_iid(str(movie1))
#                 innerID2 = simsAlgo.trainset.to_inner_iid(str(movie2))
#                 similarity = simsMatrix[innerID1][innerID2]
#                 total += similarity
#                 n += 1

#         S = total / n
#         return (1-S)

#     def Novelty(topNPredicted, rankings):
#         n = 0
#         total = 0
#         for userID in topNPredicted.keys():
#             for rating in topNPredicted[userID]:
#                 movieID = rating[0]
#                 rank = rankings[movieID]
#                 total += rank
#                 n += 1
#         return total / n
    
    
metrics = RecommenderMetrics

In [19]:
from surprise.model_selection import LeaveOneOut
from surprise import SVD
# Set aside one rating per user for testing
LOOCV = LeaveOneOut(n_splits=1, random_state=1)

algo = SVD(random_state=10)

for trainset, testset in LOOCV.split(data):
    print("Computing recommendations with leave-one-out...")
    algo.fit(trainset)
    
    print("Predict ratings for left-out set...")
    left_out_predictions = algo.test(testset)
    
    print("Predict all missing ratings...")
    # build_anti_testset returns a list of ratings that can be used as a testset in the test() method.
    big_testset = trainset.build_anti_testset()
    all_predictions = algo.test(big_testset)
    
    print("Compute top 10 recommendations per user...")
    topN_predicted = metrics.GetTopN(all_predictions, n=10)

    print("\nHit Rate: ", metrics.HitRate(topN_predicted, left_out_predictions))
    # Break down hit rate by rating value
    print("\nRating Hit Rate: ")
    metrics.RatingHitRate(topN_predicted, left_out_predictions)
    print("\nCumulative Hit Rate: ", metrics.CumulativeHitRate(topN_predicted, left_out_predictions, rating_cutoff=5.0))
    print("\nAverage Reciprocal Hit Rate: ", metrics.AverageReciprocalHitRank(topN_predicted, left_out_predictions))

Computing recommendations with leave-one-out...
Predict ratings for left-out set...
Predict all missing ratings...
Compute top 10 recommendations per user...

Hit Rate:  0.01143033673154155

Rating Hit Rate: 
0.0 0.0048567265662943174
7.0 0.015789473684210527
8.0 0.017857142857142856
9.0 0.004807692307692308
10.0 0.0625

Cumulative Hit Rate:  0.02378854625550661

Average Reciprocal Hit Rate:  0.004727702016858643
