# Settings and Imports

In [1]:
!pip install surprise

Collecting surprise
  Downloading https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl
Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/f5/da/b5700d96495fb4f092be497f02492768a3d96a3f4fa2ae7dea46d4081cfa/scikit-surprise-1.1.0.tar.gz (6.4MB)
[K     |████████████████████████████████| 6.5MB 2.8MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.0-cp36-cp36m-linux_x86_64.whl size=1675744 sha256=9773e02b8149f266bd7b76fe5199092b8bc728a500ae809a7840df0f6f50fe57
  Stored in directory: /root/.cache/pip/wheels/cc/fa/8c/16c93fccce688ae1bde7d979ff102f7bee980d9cfeb8641bcf
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.0 surprise-0.1


In [2]:
import pandas as pd 
import numpy as np
from collections import defaultdict

from surprise import Reader, Dataset, SVD, accuracy
from surprise.model_selection import cross_validate, GridSearchCV, train_test_split

In [8]:
ratings = pd.read_csv('/content/drive/My Drive/Colab_Notebooks/data/movies/ratings_small.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


## Dataset

In [9]:
reader = Reader()
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Baseline classifier

In [41]:
trainset, testset = train_test_split(data, test_size=.2, random_state=42)
algo = SVD()

False


In [32]:
cross_validate(algo, data, measures = ['RMSE', 'MAE'], cv = 3, verbose = True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9038  0.8967  0.9080  0.9028  0.0047  
MAE (testset)     0.6968  0.6898  0.6987  0.6951  0.0038  
Fit time          4.20    4.10    4.18    4.16    0.04    
Test time         0.28    0.29    0.40    0.32    0.06    


{'fit_time': (4.199181079864502, 4.104192018508911, 4.1750617027282715),
 'test_mae': array([0.69676532, 0.68984796, 0.69874343]),
 'test_rmse': array([0.90376093, 0.89670132, 0.90804132]),
 'test_time': (0.27787303924560547, 0.2867257595062256, 0.40204858779907227)}

In [33]:
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f60c781c048>

In [34]:
predictions = algo.test(testset)
accuracy.rmse(predictions)

RMSE: 0.9012


0.9011855362236062

In [35]:
uid = 1
ratings[ratings['userId'] == 1]

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151
6,1,1287,2.0,1260759187
7,1,1293,2.0,1260759148
8,1,1339,3.5,1260759125
9,1,1343,2.0,1260759131


In [36]:
# get a prediction for specific users and items.
algo.predict(uid, 302)

Prediction(uid=1, iid=302, r_ui=None, est=2.8666020096326807, details={'was_impossible': False})

# Grid Search CV

In [37]:
param_grid = {'n_epochs': [5, 10, 15], 'lr_all': [0.002, 0.005, 0.008],
              'reg_all': [0.2, 0.4, 0.6]}

gs = GridSearchCV(SVD, param_grid, measures = ['RMSE', 'MAE'], cv = 3)

gs.fit(data)
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

0.8970623695275625
{'n_epochs': 15, 'lr_all': 0.008, 'reg_all': 0.2}


In [53]:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f4f3f00ce80>

# API

In [43]:
def get_top_n(predictions, n = 10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [44]:
reader = Reader()
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [45]:
trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f60c6ad04a8>

In [46]:
# Than predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

In [48]:
top_n = get_top_n(predictions, n=10)

# # Print the recommended items for each user
# for uid, user_ratings in top_n.items():
#     print(uid, [iid for (iid, _) in user_ratings])

## Get top 10 recommendations per user
Given a user

In [49]:
top_n[1]

[(7502, 3.6549001458832375),
 (1212, 3.6224327459600016),
 (1217, 3.6223186532253853),
 (858, 3.61516737901969),
 (1219, 3.579817546669988),
 (5995, 3.5784258970373632),
 (48516, 3.575476137695213),
 (3035, 3.567122143728089),
 (2064, 3.551548169322184),
 (1201, 3.5364626406430544)]

In [50]:
top_n[2]

[(899, 4.443233670329531),
 (1212, 4.397175422690432),
 (969, 4.381537427493032),
 (904, 4.370810051652229),
 (318, 4.362856631042568),
 (913, 4.3616920930178384),
 (6787, 4.357325223829409),
 (1221, 4.351347430201646),
 (1252, 4.345143364182624),
 (1217, 4.339924435749329)]

## Get predicted movie rating

Given a user and a movie

In [51]:
algo.predict(2, 904)

Prediction(uid=2, iid=904, r_ui=None, est=4.370810051652229, details={'was_impossible': False})

References:
* https://surprise.readthedocs.io/en/stable/getting_started.html
* https://www.kaggle.com/ibtesama/getting-started-with-a-movie-recommendation-system
* https://www.kaggle.com/rounakbanik/movie-recommender-systems
* https://www.kaggle.com/fabiendaniel/film-recommendation-engine
* https://www.kaggle.com/fabiendaniel/film-recommendation-engine
* https://surprise.readthedocs.io/en/stable/FAQ.html
* https://surprise.readthedocs.io/en/stable/matrix_factorization.html?highlight=svd#surprise.prediction_algorithms.matrix_factorization.SVD
