# Build a recommender system and recommend top-N items
Collaborative filtering: matrix factorization with SVD algorithm

## load package

In [1]:
from surprise import SVD

## put utils folder in the same directory as the notebook
from utils.MovieLens import MovieLens

## utils function

In [2]:
## get anti-test items for a user
def BuildAntiTestSetForUser(testSubject, trainset):
    fill = trainset.global_mean

    anti_testset = []
    # convert raw id to inner id requested by "surprise" lib
    u = trainset.to_inner_uid(str(testSubject))
    # user rated items
    user_items = set([j for (j, _) in trainset.ur[u]])
    # items not rated by the user, and rating is imputed by global_mean for evaluation 
    anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for i in trainset.all_items() if i not in user_items]
    
    return anti_testset

## read data

In [4]:
# Pick an arbitrary test subject
testSubject = 85

ml = MovieLens()

print("Loading movie ratings...")
data = ml.loadMovieLensLatestSmall()

userRatings = ml.getUserRatings(testSubject)
type(userRatings)

Loading movie ratings...


list

In [5]:
## example for (item, rating)
userRatings[:5]

[(2, 5.0), (3, 2.0), (5, 3.0), (10, 5.0), (19, 3.0)]

In [6]:
## show user's loved and hated items
loved = []
hated = []
for ratings in userRatings:
    if (float(ratings[1]) > 4.0):
        loved.append(ratings)
    if (float(ratings[1]) < 3.0):
        hated.append(ratings)

print("\nUser ", testSubject, " loved these movies:")
for ratings in loved:
    print(ml.getMovieName(ratings[0]))
    
print("\n...and didn't like these movies:")
for ratings in hated:
    print(ml.getMovieName(ratings[0]))


User  85  loved these movies:
Jumanji (1995)
GoldenEye (1995)
Braveheart (1995)
Jerky Boys, The (1995)
LÃ©on: The Professional (a.k.a. The Professional) (LÃ©on) (1994)
Pulp Fiction (1994)
Stargate (1994)
Shawshank Redemption, The (1994)
Star Trek: Generations (1994)
Clear and Present Danger (1994)
Speed (1994)
True Lies (1994)
Fugitive, The (1993)
Jurassic Park (1993)
Terminator 2: Judgment Day (1991)
Mission: Impossible (1996)
Rock, The (1996)

...and didn't like these movies:
Grumpier Old Men (1995)
Mortal Kombat (1995)
Postman, The (Postino, Il) (1994)
Casper (1995)
Lord of Illusions (1995)
Mighty Morphin Power Rangers: The Movie (1995)
Prophecy, The (1995)
Dolores Claiborne (1995)
Heavenly Creatures (1994)
Little Women (1994)
Miracle on 34th Street (1994)
Nell (1994)
Poison Ivy II (1996)
Tank Girl (1995)
While You Were Sleeping (1995)
Wes Craven's New Nightmare (Nightmare on Elm Street Part 7: Freddy's Finale, A) (1994)
Naked Gun 33 1/3: The Final Insult (1994)
Richie Rich (1994)


## Collaborative filtering: matrix factorization with SVD algorithm

In [7]:
print("\nBuilding recommendation model...")
## take all data as the training set
trainSet = data.build_full_trainset()

algo = SVD(random_state=10)
algo.fit(trainSet)


Building recommendation model...


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f7f5d98dfa0>

In [12]:
## take only unrated items for this user
testSet = BuildAntiTestSetForUser(testSubject, trainSet)

# rating is imputed by global_mean
testSet[:5]

[('85', '31', 3.543608255669773),
 ('85', '1029', 3.543608255669773),
 ('85', '1061', 3.543608255669773),
 ('85', '1129', 3.543608255669773),
 ('85', '1172', 3.543608255669773)]

In [8]:
print("Computing recommendations...")
predictions = algo.test(testSet)
type(predictions)

Computing recommendations...


list

In [9]:
predictions[:5]

[Prediction(uid='85', iid='31', r_ui=3.543608255669773, est=3.1324638482291762, details={'was_impossible': False}),
 Prediction(uid='85', iid='1029', r_ui=3.543608255669773, est=3.2853604905261515, details={'was_impossible': False}),
 Prediction(uid='85', iid='1061', r_ui=3.543608255669773, est=3.3872287517216773, details={'was_impossible': False}),
 Prediction(uid='85', iid='1129', r_ui=3.543608255669773, est=3.4175764210399016, details={'was_impossible': False}),
 Prediction(uid='85', iid='1172', r_ui=3.543608255669773, est=3.912568105019612, details={'was_impossible': False})]

## get top-N recommendations

In [11]:
recommendations = []

print ("\nWe recommend:")
for userID, movieID, actualRating, estimatedRating, _ in predictions:
    intMovieID = int(movieID)
    recommendations.append((intMovieID, estimatedRating))

## sort by estimatedRating descreasingly
recommendations.sort(key=lambda x: x[1], reverse=True)

# top-10
for ratings in recommendations[:10]:
    print(ml.getMovieName(ratings[0]))


We recommend:
Seven Samurai (Shichinin no samurai) (1954)
There Will Be Blood (2007)
Dark Knight, The (2008)
Ran (1985)
Usual Suspects, The (1995)
Night of the Living Dead (1968)
Bowling for Columbine (2002)
Hurt Locker, The (2008)
Harvey (1950)
Departed, The (2006)
