### Test Metrics with SVD 

In [13]:
from Movielens import MovieLens
from surprise import SVD
from surprise import KNNBaseline
from surprise.model_selection import train_test_split
from surprise.model_selection import LeaveOneOut
from RecommenderMetrics import RecommenderMetrics

In [15]:
ml = MovieLens()

In [3]:
print("Loading movie ratings...")
data = ml.loadMovieLensLatestSmall()

Loading movie ratings...


In [42]:
print("Computing movie popularity ranks so we can measure novelty later...")
rankings = ml.getPopularityRanks()

Computing movie popularity ranks so we can measure novelty later...


In [5]:
print("Computing item similarities so we can measure diversity later...")
fullTrainSet = data.build_full_trainset()
sim_options = {'name': 'pearson_baseline', 'user_based': False}
simsAlgo = KNNBaseline(sim_options=sim_options)
simsAlgo.fit(fullTrainSet)


Computing item similarities so we can measure diversity later...
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x7fbe73b00ed0>

In [6]:
print("\nBuilding recommendation model...")
trainSet, testSet = train_test_split(data, test_size=.25, random_state=1)

algo = SVD(random_state=10)
algo.fit(trainSet)


Building recommendation model...


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fbe73c16790>

In [7]:
print("Computing recommendations...")
predictions = algo.test(testSet)


Computing recommendations...


In [8]:
print("\nEvaluating accuracy of model...")
print("RMSE: ", RecommenderMetrics.RMSE(predictions))
print("MAE: ", RecommenderMetrics.MAE(predictions))


Evaluating accuracy of model...
RMSE:  0.87790565300794
MAE:  0.6731720779996845


In [9]:
print("\nEvaluating top-10 recommendations...")

# Set aside one rating per user for testing
LOOCV = LeaveOneOut(n_splits=1, random_state=1)

for trainSet, testSet in LOOCV.split(data):
    print("Computing recommendations with leave-one-out...")

    # Train model without left-out ratings
    algo.fit(trainSet)

    # Predicts ratings for left-out ratings only
    print("Predict ratings for left-out set...")
    leftOutPredictions = algo.test(testSet)

    # Build predictions for all ratings not in the training set
    print("Predict all missing ratings...")
    bigTestSet = trainSet.build_anti_testset()
    allPredictions = algo.test(bigTestSet)

    # Compute top 10 recs for each user
    print("Compute top 10 recs per user...")
    topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n=10)

    # See how often we recommended a movie the user actually rated
    print("\nHit Rate: ", RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions))

    # Break down hit rate by rating value
    print("\nrHR (Hit Rate by Rating value): ")
    RecommenderMetrics.RatingHitRate(topNPredicted, leftOutPredictions)

    # See how often we recommended a movie the user actually liked
    print("\ncHR (Cumulative Hit Rate, rating >= 4): ", RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions, 4.0))

    # Compute ARHR
    print("\nARHR (Average Reciprocal Hit Rank): ", RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions))




Evaluating top-10 recommendations...
Computing recommendations with leave-one-out...
Predict ratings for left-out set...
Predict all missing ratings...
Compute top 10 recs per user...

Hit Rate:  0.036065573770491806

rHR (Hit Rate by Rating value): 
2.5 0.06666666666666667
3.0 0.008695652173913044
4.0 0.044444444444444446
4.5 0.09433962264150944
5.0 0.056910569105691054

cHR (Cumulative Hit Rate, rating >= 4):  0.056179775280898875

ARHR (Average Reciprocal Hit Rank):  0.013333333333333332


In [10]:
print("\nComputing complete recommendations, no hold outs...")
algo.fit(fullTrainSet)
bigTestSet = fullTrainSet.build_anti_testset()
allPredictions = algo.test(bigTestSet)
topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n=10)


Computing complete recommendations, no hold outs...


In [11]:
# Print user coverage with a minimum predicted rating of 4.0:
print("\nUser coverage: ", RecommenderMetrics.UserCoverage(topNPredicted, fullTrainSet.n_users, ratingThreshold=4.0))


User coverage:  0.9245901639344263


In [12]:
# Measure diversity of recommendations:
print("Diversity: ", RecommenderMetrics.Diversity(topNPredicted, simsAlgo))

Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.

Diversity:  0.9642412821104059


In [13]:
# Measure novelty (average popularity rank of recommendations):
print("\nNovelty (average popularity rank): ", RecommenderMetrics.Novelty(topNPredicted, rankings))


Novelty (average popularity rank):  504.3873857062885


# XXXXXXXXXXX

In [16]:
from surprise import NormalPredictor
from Evaluator import Evaluator

import random
import numpy as np

def LoadMovieLensData():
    ml = MovieLens()
    print("Loading movie ratings...")
    data = ml.loadMovieLensLatestSmall()
    print("\nComputing movie popularity ranks so we can measure novelty later...")
    rankings = ml.getPopularityRanks()
    return (data, rankings)

np.random.seed(0)
random.seed(0)


In [7]:
# Load up common data set for the recommender algorithms
(evaluationData, rankings) = LoadMovieLensData()

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...


In [8]:
# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [9]:
# Throw in an SVD recommender
SVDAlgorithm = SVD(random_state=10)
evaluator.AddAlgorithm(SVDAlgorithm, "SVD")

In [10]:
# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

In [11]:
# Fight!
evaluator.Evaluate(True)

Evaluating  SVD ...
Evaluating accuracy...
Evaluating top-N with leave-one-out...
Computing hit-rate and rank metrics...
Computing recommendations with full data set...
Analyzing coverage, diversity, and novelty...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Evaluating top-N with leave-one-out...
Computing hit-rate and rank metrics...
Computing recommendations with full data set...
Analyzing coverage, diversity, and novelty...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.


Algorithm  RMSE       MAE        HR         cHR        ARHR       Coverage   Diversity  Novelty   
SVD        0.8779     0.6732     0.0361     0.0361     0.0133     0.9246     0.0314     504.3874  
Random     1.4227     1.1375     0.0180     0.0180     0.0090     1.0000     0.0535     843.9634  

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accurac

### Content Based Recommendations

In [5]:
from MovieLens import MovieLens
from ContentKNNAlgorithm import ContentKNNAlgorithm
from Evaluator import Evaluator
from surprise import NormalPredictor

import random
import numpy as np

def LoadMovieLensData():
    ml = MovieLens()
    print("Loading movie ratings...")
    data = ml.loadMovieLensLatestSmall()
    print("\nComputing movie popularity ranks so we can measure novelty later...")
    rankings = ml.getPopularityRanks()
    return (ml, data, rankings)

np.random.seed(0)
random.seed(0)

# Load up common data set for the recommender algorithm.
(ml, evaluationData, rankings) = LoadMovieLensData()

# Construct an Evaluator to evaluate chosen models (KNN, random).
evaluator = Evaluator(evaluationData, rankings)

# Initialize content-based KNN. By default the algorithm creates 40 clusters.
contentKNN = ContentKNNAlgorithm()
evaluator.AddAlgorithm(contentKNN, "ContentKNN")

# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating  ContentKNN ...
Evaluating accuracy...
Computing content-based similarity matrix...
0  of  8775
100  of  8775
200  of  8775
300  of  8775
400  of  8775
500  of  8775
600  of  8775
700  of  8775
800  of  8775
900  of  8775
1000  of  8775
1100  of  8775
1200  of  8775
1300  of  8775
1400  of  8775
1500  of  8775
1600  of  8775
1700  of  8775
1800  of  8775
1900  of  8775
2000  of  8775
2100  of  8775
2200  of  8775
2300  of  8775
2400  of  8775
2500  of  8775
2600  of  8775
2700  of  8775
2800  of  8775
2900  of  8775
3000  of  8775
3100  of  8775
3200  of  8775
3300  of  8775
3400  of  8775
3500  of  8775
3600  of  8775
3700  of  8775
3800  of  8775
3900  of  8775
4000  of  8775
4100  of  8775
4200  of  8775
4300  of  8775
4400  of  8775
4500  of  8775
4600  of  8775
4700  of  8

Here we run through the recommendation with the same framework. The steps here are similar to the above when I tested the recommendation evaluation framework. The difference here is that instead of evaluating the SVD algorithm against a random one, I am evaluating a new ContentKNN algorithm against random recommendations. Also I dont compute the top-N recommender metrics and only look at accuracy but will sample the top-N recommendations for user 85 to get a feel of how the system is working.

So whats in the contentKNN algorithm? The main point architecturally is that we're creating ContentKNN algorithm as a derived class from surprise library algo base-class.

### Neighborhood based recommendations:

**User-based collaborative filtering**

The idea behind user-based collaborative filtering is simple is to find other users similar to yourself, based on their ratings history, and then recommend stuff they liked that you haven't seen yet. Lets implement this collaborative filtering technique with my framework below.

In [6]:
from MovieLens import MovieLens
from surprise import KNNBasic
import heapq
from collections import defaultdict
from operator import itemgetter
        
# Select test user with inner user id 86
testSubject = '86'
k = 10

In [7]:
# Load our data set and compile training set.
ml = MovieLens()
data = ml.loadMovieLensLatestSmall()
trainSet = data.build_full_trainset()

# Not initializing a test set here since I only want to generate top-n recommendations and dont care about 
# predicting user ratings.

In [8]:
# Specify cosine similarity as our similarity metric and that we want user-user similarities.
sim_options = {'name': 'cosine',
               'user_based': True
               }

In [9]:
# Initialize model with tunings, fit to data and compute similarities.
model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [10]:
# Get top N similar users to our test subject
# (alternate approach would be to select users up to some similarity threshold)
testUserInnerID = trainSet.to_inner_uid(testSubject)
similarityRow = simsMatrix[testUserInnerID]


In [11]:
#compile list of similar users.
similarUsers = []
for innerID, score in enumerate(similarityRow):
    if (innerID != testUserInnerID):
        similarUsers.append( (innerID, score) )

In [12]:
#quickly sort all users by similarity to test user 86 and extract the top K-neighbors.
kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1])

In [13]:
# Get the movies they rated, and add up ratings for movie, weighted by user similarity
candidates = defaultdict(float)
for similarUser in kNeighbors:
    innerID = similarUser[0]
    userSimilarityScore = similarUser[1]
    theirRatings = trainSet.ur[innerID]
    for rating in theirRatings:
        candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore

In [14]:
# Build a dictionary containing the movies the user has already seen so that we can exclude them.
watched = {}
for itemID, rating in trainSet.ur[testUserInnerID]:
    watched[itemID] = 1

In [15]:
# Get top-rated items from similar users:
pos = 0
for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
    if not itemID in watched:
        movieID = trainSet.to_raw_iid(itemID)
        print(ml.getMovieName(int(movieID)), ratingSum)
        pos += 1
        if (pos > 10):
            break

10 Things I Hate About You (1999) 2.7
Full Metal Jacket (1987) 2.2
Chinatown (1974) 2.2
Payback (1999) 2.0
Maltese Falcon, The (1941) 1.8
Alien (1979) 1.8
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964) 1.6
L.A. Confidential (1997) 1.6
Lock, Stock & Two Smoking Barrels (1998) 1.6
Kill Bill: Vol. 1 (2003) 1.5
Young Frankenstein (1974) 1.5


**Item-based collaborative filtering**

Another way to do collaborative filtering instead of looking for other people similar to a user and recommending what a similar user liked, we can look at items a user liked and recommend items similar to those a user has liked.

There are a few reasons that using similairites between items can be better than similarities between people: 

- Items tend to be more permanent in nature than people in the sense that a movie will always be a movie but peoples taste may change quickly over the course of their lives. So focusing on the similarities between unchageing objects can produce better results than looking at similiarites between people where one may have liked something this week but be looking at something totally different next week.


- Because an item similarity matrix wont change as often as a user similarity matrix, they dont need to be computed as often.


- There are typically far less items to deal with than people. Whatever company using a system such as this one, probably has a relatively small product catalog compared to the number of customers or users they have. This makes a 2D matrix mapping item similarity scores between every item in a catalog much smaller than a mapping of similarities between every user visiting your site. Not only is it easier to store the matrix but much faster to compute aswell. When dealing with massive systems like Amazon's or Netflix's computational efficiency is very important, not only does it require fewer resources, it means that you can regenerate similarities between items more often, making your system more responsive when new items are introduced.


- Makes for better experiences for new users because as soon as a new user comes to a website, as soon as they have indicated interest in one thing.

In [31]:
# Test framework for Item-based collaborative filtering.

testSubject = '85'
k = 10

In [32]:
# Load our data set and compile training set.

ml = MovieLens()
data = ml.loadMovieLensLatestSmall()
trainSet = data.build_full_trainset()

In [33]:
# Specify cosine similarity as our similarity metric and compute over item-item pairs as opposed to user-user pairs.

sim_options = {'name': 'cosine',
               'user_based': False
               }

In [34]:
# Initialize model with tunings, fit to data and compute similarities.

model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [35]:
# Convert user id to raw user id.

testUserInnerID = trainSet.to_inner_uid(testSubject)

In [36]:
# Get the top N items the test user rated.

testUserRatings = trainSet.ur[testUserInnerID]
kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])

In [37]:
# Get similar items to stuff our user liked (weighted by rating)
candidates = defaultdict(float)
for itemID, rating in kNeighbors:
    similarityRow = simsMatrix[itemID]
    for innerID, score in enumerate(similarityRow):
        candidates[innerID] += score * (rating / 5.0)
    

In [38]:
# Build a dictionary of stuff the user has already seen so that we can exclude them in top-N recommendations
watched = {}
for itemID, rating in trainSet.ur[testUserInnerID]:
    watched[itemID] = 1

In [39]:
# Get top-rated items from similar users:
pos = 0
for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
    if not itemID in watched:
        movieID = trainSet.to_raw_iid(itemID)
        print(ml.getMovieName(int(movieID)), ratingSum)
        pos += 1
        if (pos > 10):
            break

Trust (1990) 8.993195870740706
Night Porter, The (Portiere di notte, Il) (1974) 8.991602228017534
Daytrippers, The (1996) 8.978892697759282
Living in Oblivion (1995) 8.973090760365785
Melvin and Howard (1980) 8.970142500145332
Hate (Haine, La) (1995) 8.967270202229166
Presidio, The (1988) 8.96429313204702
Stop Making Sense (1984) 8.954691793826552
Color Purple, The (1985) 8.954276360003217
Opposite of Sex, The (1998) 8.95170801390616
Clue (1985) 8.949644625955322


#### Tuning collaborative filtering algorithms

There are many ways to implement user-based and item-based collaborative filtering. One thing I'm doing that's kind of arbitrary is pulling off the top 10 highest-rated items for a user when generating item-based recommendations or the top 10 most similar users when finding user-based recommendations. That seems like kind of an arbitrary cut off. Maybe it would be better if instead of taking the top-k sources for recommendation candidates, we just used any source above some given quality threshold. For example, maybe any item a user rated higher than four stars should generate item-based recommendation candidates no matter how many or how few of them there may be. Or (for user-based collaborative filtering) any user that has a cosine similarity greater than 0.95 should be used to generate candidates in the user-based recommendations. 

I try that approach below, proceeding from the line of code that generated the top-k highest-rated movies from our test user and nd replaced it with code that goes through and adds any rating above four stars to the list of movies that generate recommendation candidates.

In [41]:
testSubject = '85'
k = 10

ml = MovieLens()
data = ml.loadMovieLensLatestSmall()
trainSet = data.build_full_trainset()

sim_options = {'name': 'cosine',
               'user_based': False
               }

model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

testUserRatings = trainSet.ur[testUserInnerID]

###
kNeighbors = []
for rating in testUserRatings:
    if rating[1]> 4.0:
        kNeighbors.append(rating)
###
        
candidates = defaultdict(float)
for itemID, rating in kNeighbors:
    similarityRow = simsMatrix[itemID]
    for innerID, score in enumerate(similarityRow):
        candidates[innerID] += score * (rating / 5.0)
        
watched = {}
for itemID, rating in trainSet.ur[testUserInnerID]:
    watched[itemID] = 1

pos = 0
for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
    if not itemID in watched:
        movieID = trainSet.to_raw_iid(itemID)
        print(ml.getMovieName(int(movieID)), ratingSum)
        pos += 1
        if (pos > 10):
            break


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Opposite of Sex, The (1998) 10.94172656584623
American in Paris, An (1951) 10.936427200000246
One Fine Day (1996) 10.934890689782453
Last Picture Show, The (1971) 10.931806486265977
Gilda (1946) 10.929912059633953
Dead Calm (1989) 10.916981536391042
Streetcar Named Desire, A (1951) 10.91594253451208
Five Easy Pieces (1970) 10.914690389589753
Out of the Past (1947) 10.910660929932783
Killer, The (Die xue shuang xiong) (1989) 10.90933848448976
Born Yesterday (1950) 10.909227888131605


It's hard to say which set of results is better, but what's very noticeable is how different these lists are. We do see some of the same movies being included such as 'The opposite of Sex' but many recommendations from the previous method are totally gone here. With this change, it looks like I've displaced many of the original results with different titles. To see if this might be a promising change, we'd have to test on real users and see how they interact with their recommendations.