This notebook uses SVD to build a recommender system for movies.

In [1]:
from MovieLens import MovieLens
from surprise import SVD, SVDpp
from Evaluator import Evaluator


In [2]:
def LoadMovieLensData():
    ml=MovieLens()
    data=ml.loadMovieLensLatestSmall()
    rankings=ml.getPopularityRanks()
    print('done loading data')
    return (ml, data, rankings)

In [3]:
ml, evaluationData, rankings= LoadMovieLensData()

done loading data


In [4]:
evaluator=Evaluator(evaluationData, rankings)

In [5]:
svd= SVD()
evaluator.AddAlgorithm(svd, 'svd')

In [6]:
svdplusplus= SVDpp()
evaluator.AddAlgorithm(svdplusplus, 'svd++')

In [7]:
evaluator.Evaluate(False)
evaluator.SampleTopNRecs(ml)

Evaluating  svd ...
Evaluating accuracy...
Analysis complete.
Evaluating  svd++ ...
Evaluating accuracy...
Analysis complete.


Algorithm  RMSE       MAE       
svd        0.9054     0.7010    
svd++      0.8949     0.6895    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean better accuracy.

Using recommender  svd

Building recommendation model...
Computing recommendations...

We recommend:
Casablanca (1942) 4.649140578872909
Monty Python's The Meaning of Life (1983) 4.460887540339456
Star Wars: Episode IV - A New Hope (1977) 4.412698149889582
Hurt Locker, The (2008) 4.406935662392834
Graduate, The (1967) 4.3751131649125075
Rear Window (1954) 4.351066738486898
Prophet, A (Un ProphÌ¬te) (2009) 4.341619676135767
Stand by Me (1986) 4.33420154941339
Godfather: Part II, The (1974) 4.318249030646044
Wallace & Gromit: A Close Shave (1995) 4.312052257102536

Using recommender  svd++

Building recommendation mode

# Hyperparameter tuning for SVD

In [11]:
from surprise.model_selection import GridSearchCV

param_grid = {'n_epochs': [20, 30],
             'lr_all': [0.005, 0.01],
             'n_factors': [50, 100]}

gs= GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)
gs.fit(evaluationData)
print('Best RMSE score attained: ', gs.best_score['rmse'])
params=gs.best_params['rmse']
print(params)

Best RMSE score attained:  0.8982639167717396
{'lr_all': 0.005, 'n_epochs': 20, 'n_factors': 50}


In [16]:
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse
trainSet, testSet = train_test_split(evaluationData, test_size=.25, random_state=1)
SVD_tune= SVD(n_epochs=params['n_epochs'], lr_all=params['lr_all'], n_factors= params['n_factors'])
SVD_tune.fit(trainSet)
predictions = SVD_tune.test(testSet)
rmse(predictions)

RMSE: 0.8999


0.899857436364382

In [21]:
# sample topN recommendations for user 85, movies he has never seen.

testSubject= 85
trainSet = evaluationData.build_full_trainset()

def GetAntiTestSetForUser(trainSet, testSubject):
    fill = trainSet.global_mean
    anti_testset = []
    u = trainSet.to_inner_uid(str(testSubject))
    user_items = set([j for (j, _) in trainSet.ur[u]])
    anti_testset += [(trainSet.to_raw_uid(u), trainSet.to_raw_iid(i), fill) for
                        i in trainSet.all_items() if i not in user_items]
    return anti_testset

SVD_tune.fit(trainSet)

print("Computing recommendations...")
testSet = GetAntiTestSetForUser(trainSet, testSubject)
        
predictions = SVD_tune.test(testSet)
            
recommendations = []
            
print ("\nWe recommend:")
for userID, movieID, actualRating, estimatedRating, _ in predictions:
    intMovieID = int(movieID)
    recommendations.append((intMovieID, estimatedRating))
            
recommendations.sort(key=lambda x: x[1], reverse=True)

for ratings in recommendations[:10]:
    print(ml.getMovieName(ratings[0]), ratings[1])

Computing recommendations...

We recommend:
Rear Window (1954) 4.505166551211121
All About Eve (1950) 4.497459674244014
City of God (Cidade de Deus) (2002) 4.381015143632628
African Queen, The (1951) 4.3539168298183775
Producers, The (1968) 4.352272743776446
Maltese Falcon, The (1941) 4.2755665948006305
American Beauty (1999) 4.2463112878166624
Sunset Blvd. (a.k.a. Sunset Boulevard) (1950) 4.244799539483468
Being John Malkovich (1999) 4.241363213245121
Strangers on a Train (1951) 4.238194359795351
