In [162]:


from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import time
import datetime
import random

import numpy as np
import six
from tabulate import tabulate
from collections import defaultdict


import surprise 
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.model_selection import GridSearchCV
from surprise.model_selection import KFold
from surprise.model_selection.split import train_test_split
from surprise import NormalPredictor
from surprise import BaselineOnly
from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import SlopeOne
from surprise import CoClustering
from surprise import accuracy

# from sklearn.model_selection import train_test_split

In [245]:

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# set RNG
np.random.seed(0)
random.seed(0)

In [4]:
# dataset = 'ml-1m'
# data = Dataset.load_builtin(dataset)
# kf = KFold(random_state=0)  # folds will be the same for all algorithms.

In [249]:
import pandas as pd
rating = pd.read_csv('data/rating.csv', encoding="latin-1")

In [252]:
# rename columns to fit the model 
ratings = pd.DataFrame(rating.rename(columns={"User-ID": "userID", "ISBN": "itemID", "Book-Rating": "rating"}))

In [253]:
rating.head(2)
ratings.head(2)

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0.0
1,276726,155061224,5.0


Unnamed: 0,userID,itemID,rating
0,276725,034545104X,0.0
1,276726,155061224,5.0


In [254]:
ratings

Unnamed: 0,userID,itemID,rating
0,276725,034545104X,0.0
1,276726,155061224,5.0
2,276727,446520802,0.0
3,276729,052165615X,3.0
4,276729,521795028,6.0
...,...,...,...
1048570,250764,451410777,0.0
1048571,250764,452264464,8.0
1048572,250764,048623715X,0.0
1048573,250764,486256588,0.0


In [237]:
books_df = pd.read_csv('data/books.csv', encoding="latin-1")

  interactivity=interactivity, compiler=compiler, result=result)


In [238]:
# Drop imageURL features, don't need them 
features = ['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']
books = books_df[features]

In [239]:
books.shape
books.head(2)

(271379, 5)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada


In [151]:
ratings.shape
ratings.head()
ratings.describe()
ratings.info()

(1048575, 3)

Unnamed: 0,userID,itemID,rating
0,276725,034545104X,0.0
1,276726,155061224,5.0
2,276727,446520802,0.0
3,276729,052165615X,3.0
4,276729,521795028,6.0


Unnamed: 0,userID,rating
count,1048575.0,1048570.0
mean,128508.9,2.879905
std,74218.76,3.857869
min,2.0,0.0
25%,63394.0,0.0
50%,128835.0,0.0
75%,192779.0,7.0
max,278854.0,10.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 3 columns):
 #   Column  Non-Null Count    Dtype  
---  ------  --------------    -----  
 0   userID  1048575 non-null  int64  
 1   itemID  1048574 non-null  object 
 2   rating  1048570 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 24.0+ MB


In [7]:
# check range of rating 
min_rating = ratings.rating.min()
max_rating = ratings.rating.max()
min_rating
max_rating

0.0

10.0

##  Merge rating and books data

In [259]:
rating_books_merged = pd.merge(rating, books, on='ISBN')
rating_books_merged.shape
rating_books_merged.head()

(941148, 7)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,276725,034545104X,0.0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
1,2313,034545104X,5.0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
2,6543,034545104X,0.0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
3,8680,034545104X,5.0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
4,10314,034545104X,9.0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books


In [262]:
rating_books_merged[rating_books_merged['ISBN'] == '034545104X']['Book-Title'].unique()

array(['Flesh Tones: A Novel'], dtype=object)

# Reduce the dimension
* Inlcude only books with rating > 100
* Include only users with rating > 100
* Avoid kernel crash


In [22]:
# Update threshold for ratings to include 
book_threshold = 100
user_threshold = 100

selected_books = ratings['itemID'].value_counts() > book_threshold
selected_books = selected_books[selected_books].index.tolist()

selected_users = ratings['userID'].value_counts() > user_threshold
selected_users = selected_users[selected_users].index.tolist()

In [23]:
updated_df = ratings[(ratings['itemID'].isin(selected_books)) & (ratings['userID'].isin(selected_users))]
print('The original data frame shape:\t{}'.format(ratings.shape))
print('The new data frame shape:\t{}'.format(updated_df.shape))

The original data frame shape:	(1048575, 3)
The new data frame shape:	(54350, 3)


In [27]:
# Specify range of reviews using Reader class 

from surprise import Reader
reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(updated_df[['userID', 'itemID', 'rating']], reader)

In [39]:
# kf = KFold(random_state=0)  # folds will be the same for all algorithms

In [37]:
# Run later 
benchmark = []
# Iterate over all algorithms
for algorithm in [NormalPredictor(), BaselineOnly(), KNNBasic(), SVD(), SVDpp()]:
# [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
   
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BaselineOnly,3.422806,0.074787,0.104907
SVD,3.572929,2.037984,0.142842
KNNBasic,3.683654,0.162999,1.97201
SVDpp,3.851021,26.459518,1.107822
NormalPredictor,4.674378,0.051953,0.138585


In [38]:
# Run later 
benchmark = []
# Iterate over all algorithms
for algorithm in [SlopeOne(), KNNBasic(), KNNWithMeans(),]:
# [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
   
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SlopeOne,3.477395,0.111953,0.835064
KNNWithMeans,3.501631,0.191525,2.154415
KNNBasic,3.677824,0.18332,2.040198


# Baseline model has best rmse score: 
* ALS method 

In [51]:
# # Split data into train-set test-set
train_data, test_data = train_test_split(data, test_size=0.2)

In [130]:
model_bsl = BaselineOnly(bsl_options={'method': 'als', 'n_epochs': 5,
                                     'reg_u': 12, 'reg_i': 5})
predictions_bsl = model_bsl.fit(train_data).test(test_data)
accuracy.rmse(predictions_bsl)

Estimating biases using als...
RMSE: 3.4237


3.423703225971531

In [132]:
# Let's build a pandas dataframe with all the predictions

def get_Iu(uid):
    """Return the number of items rated by given user
    
    Args:
        uid: The raw id of the user.
    Returns:
        The number of items rated by the user.
    """
    
    try:
        return len(train_data.ur[train_data.to_inner_uid(uid)])
    except ValueError:  # user was not part of the trainset
        return 0
    
def get_Ui(iid):
    """Return the number of users that have rated given item
    
    Args:
        iid: The raw id of the item.
    Returns:
        The number of users that have rated the item.
    """
    
    try:
        return len(train_data.ir[train_data.to_inner_iid(iid)])
    except ValueError:  # item was not part of the trainset
        return 0

In [134]:
df = pd.DataFrame(predictions_bsl, columns=['uid', 'iid', 'rui', 'est', 'details'])    
df['Iu'] = df.uid.apply(get_Iu)
df['Ui'] = df.iid.apply(get_Ui)
df['err'] = abs(df.est - df.rui)

In [135]:
best_predictions_bsl = df.sort_values(by='err')[:10]
worst_predictions_bsl = df.sort_values(by='err')[-10:]
best_predictions_bsl
worst_predictions_bsl


Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
5917,167349,425182908,0.0,0.0,{'was_impossible': False},45,70,0.0
3377,87746,671867091,0.0,0.0,{'was_impossible': False},100,59,0.0
5873,87746,375700757,0.0,0.0,{'was_impossible': False},100,89,0.0
3393,39646,425182908,0.0,0.0,{'was_impossible': False},100,70,0.0
5854,55548,316781266,0.0,0.0,{'was_impossible': False},73,60,0.0
3417,23768,399146253,0.0,0.0,{'was_impossible': False},194,38,0.0
995,198711,345380371,0.0,0.0,{'was_impossible': False},152,96,0.0
574,36836,671867156,0.0,0.0,{'was_impossible': False},92,69,0.0
573,98741,031298328X,0.0,0.0,{'was_impossible': False},119,77,0.0
7946,98741,440213290,0.0,0.0,{'was_impossible': False},119,54,0.0


Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
818,200674,553576801,10.0,0.761639,{'was_impossible': False},70,44,9.238361
10786,200674,446607657,10.0,0.731876,{'was_impossible': False},70,97,9.268124
2968,55492,61015725,10.0,0.722581,{'was_impossible': False},165,36,9.277419
5396,172742,743225406,10.0,0.696206,{'was_impossible': False},102,59,9.303794
9747,11601,60938455,10.0,0.691043,{'was_impossible': False},86,89,9.308957
1324,35859,842342702,10.0,0.620575,{'was_impossible': False},239,41,9.379425
7311,69232,440222656,10.0,0.434504,{'was_impossible': False},74,168,9.565496
1933,245827,451183665,10.0,0.339853,{'was_impossible': False},59,66,9.660147
10219,69232,051513287X,10.0,0.291569,{'was_impossible': False},74,94,9.708431
2069,31826,439139597,0.0,10.0,{'was_impossible': False},39,67,10.0


# Get top N-book recommendations for each user

In [203]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [310]:
# First train an SVD algorithm

trainset_bsl = data.build_full_trainset()

model_bsl.fit(trainset_bsl)

Estimating biases using als...


<surprise.prediction_algorithms.baseline_only.BaselineOnly at 0x11dc2c790>

In [311]:
# predict ratings for all pairs (u, i) that are NOT in the training set

testset_bsl = trainset_bsl.build_anti_testset()
predictions_bsl = model_bsl.test(testset_bsl)

# Enter User info : User-ID to get recommended books

In [313]:

# Enter use info 

user = 276925
top_n = get_top_n(predictions_bsl)
user_book_isbn = top_n.get(user)


In [314]:

print('\n\n The following books are remmended for you:\n\n ')

for isbn, pred_rating in user_book_isbn:
    book_name = rating_books_merged[rating_books_merged['ISBN'] == isbn]['Book-Title'].unique()
    print(str(book_name)[2:-2] )   
    print('\n\t\t You will like this book with (predictive) rating of: ', round(pred_rating, 2),'/10.0' '\n')
    




 The following books are remmended for you:

 
Harry Potter and the Goblet of Fire (Book 4)

		 You will like this book with (predictive) rating of:  6.11 /10.0

Harry Potter and the Prisoner of Azkaban (Book 3)

		 You will like this book with (predictive) rating of:  6.06 /10.0

Harry Potter and the Chamber of Secrets (Book 2)

		 You will like this book with (predictive) rating of:  6.03 /10.0

Harry Potter and the Sorcerer's Stone (Book 1)

		 You will like this book with (predictive) rating of:  5.95 /10.0

Harry Potter and the Order of the Phoenix (Book 5)

		 You will like this book with (predictive) rating of:  5.11 /10.0

Coraline

		 You will like this book with (predictive) rating of:  4.48 /10.0

Harry Potter and the Prisoner of Azkaban (Book 3)

		 You will like this book with (predictive) rating of:  4.41 /10.0

Anne Frank: The Diary of a Young Girl

		 You will like this book with (predictive) rating of:  4.38 /10.0

Eats

		 You will like this book with (predictive) r

# Matrix Factorization: SVD, SVDpp
* Hyper parameter tunning using GridSearchCV 

In [287]:
model_svd = SVD()
fit_model_svd = model_svd.fit(train_data)
predictions_svd = fit_model_svd.test(test_data)

In [288]:
accuracy.rmse(predictions_svd)

RMSE: 3.6150


3.614955702286585

In [289]:
# predict one uid and one iid
pred1 = model_svd.predict(uid ='6', iid = '2' )
pred1.est

2.2592226310947563

In [290]:
# Prediction df for best and wordst cases 

df = pd.DataFrame(predictions_svd, columns=['uid', 'iid', 'rui', 'est', 'details'])    
df['Iu'] = df.uid.apply(get_Iu)
df['Ui'] = df.iid.apply(get_Ui)
df['err'] = abs(df.est - df.rui)


best_predictions_svd = df.sort_values(by='err')[:10]
worst_predictions_svd = df.sort_values(by='err')[-10:]
best_predictions_svd
worst_predictions_svd

Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
1259,23768,671004573,0.0,0.0,{'was_impossible': False},194,67,0.0
1381,115120,385720106,0.0,0.0,{'was_impossible': False},133,108,0.0
1376,35859,385336179,0.0,0.0,{'was_impossible': False},239,62,0.0
6348,113270,767905180,0.0,0.0,{'was_impossible': False},88,76,0.0
6372,11601,743237188,0.0,0.0,{'was_impossible': False},86,93,0.0
6379,145451,515124214,0.0,0.0,{'was_impossible': False},54,52,0.0
1362,232131,743237188,0.0,0.0,{'was_impossible': False},190,93,0.0
6400,198711,449003795,0.0,0.0,{'was_impossible': False},152,74,0.0
1357,36836,671024248,0.0,0.0,{'was_impossible': False},92,57,0.0
6407,79186,971880107,0.0,0.0,{'was_impossible': False},52,442,0.0


Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
6041,214786,345417623,10.0,0.0,{'was_impossible': False},52,111,10.0
8088,87555,451524934,10.0,0.0,{'was_impossible': False},96,57,10.0
8144,141493,316666343,0.0,10.0,{'was_impossible': False},55,306,10.0
7356,52584,380018179,10.0,0.0,{'was_impossible': False},150,93,10.0
171,185233,451169530,10.0,0.0,{'was_impossible': False},154,51,10.0
10283,241980,385504209,0.0,10.0,{'was_impossible': False},46,237,10.0
1688,110440,014025448X,10.0,0.0,{'was_impossible': False},21,81,10.0
7311,69232,440222656,10.0,0.0,{'was_impossible': False},74,168,10.0
3789,172742,312983867,10.0,0.0,{'was_impossible': False},102,69,10.0
9747,11601,60938455,10.0,0.0,{'was_impossible': False},86,89,10.0


# Get top N-book recommendations for each user

In [203]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [166]:
# First train an SVD algorithm

trainset = data.build_full_trainset()
algo_svd = SVD()
algo_svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11dc6a910>

In [193]:
# predict ratings for all pairs (u, i) that are NOT in the training set

testset = trainset.build_anti_testset()
predictions_svd = algo_svd.test(testset)

# Enter User info : User-ID to get recommended books

In [232]:

# Enter use info 

user = 276925
top_n = get_top_n(predictions_svd)
user_book_isbn = top_n.get(user)


In [298]:

print('\n\n The following books are remmended for you:\n\n ')

for isbn, pred_rating in user_book_isbn:
    book_name = rating_books_merged[rating_books_merged['ISBN'] == isbn]['Book-Title'].unique()
    print(str(book_name)[2:-2] )   
    print('\n\t\t You will like this book with (predictive) rating of: ', round(pred_rating, 2),'/10.0' '\n')
    




 The following books are remmended for you:

 
Harry Potter and the Sorcerer's Stone (Book 1)

		 You will like this book with (predictive) rating of:  7.45 /10.0

Harry Potter and the Goblet of Fire (Book 4)

		 You will like this book with (predictive) rating of:  7.1 /10.0

Harry Potter and the Chamber of Secrets (Book 2)

		 You will like this book with (predictive) rating of:  6.79 /10.0

Harry Potter and the Prisoner of Azkaban (Book 3)

		 You will like this book with (predictive) rating of:  6.57 /10.0

Harry Potter and the Prisoner of Azkaban (Book 3)

		 You will like this book with (predictive) rating of:  6.05 /10.0

Confessions of a Shopaholic (Summer Display Opportunity)

		 You will like this book with (predictive) rating of:  5.99 /10.0

Chicken Soup for the Soul (Chicken Soup for the Soul)

		 You will like this book with (predictive) rating of:  5.97 /10.0

Eats

		 You will like this book with (predictive) rating of:  5.36 /10.0

The Partner

		 You will like this 

# GridSearchCV: Paramater tunning 

In [305]:
param_grid = {'n_epochs': [15, 20, 25, 30, 35, 40, 45, 50],
                'n_factors': [100, 150, 200, 300],
                'lr_all': [0.002, 0.003, 0.004, 0.005],
                'reg_all': [0.02,0.01,0.05,0.1, 0.4, 0.6]}
gs_svd = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, n_jobs = -1)

In [307]:
# gs_svd.fit(train_data)

In [303]:
# trainset = train_data.build_full_trainset()

In [None]:
print("RMSE score", gs_svd.best_score['rmse'])

# print("Best Parameters", svd_mf.best_params['rmse'])

# Use SVD++  for Matrix Factorization
* permorms regularization and optimizes known terms 
* Takes into account of implicit ratings 
* extends vanilla SVD 
* used in Netflix challange 

The method SVD++, as well as most other matrix factorisation algorithms, will depend on a number of main tuning constants: 
* the dimension DD affecting the size of UU and VV
* the learning rate affects the performance of the optimisation step
* the regularisation term affecting the overfitting of the model 
* the number of epochs, which determines how many iterations of optimisation are used

 In surprise, tuning is performed using a function called GridSearchCV, which picks the constants which perform the best at predicting a held out testset. This means constant values to try need to be predefined.

In [47]:
data

<surprise.dataset.DatasetAutoFolds at 0x13754c050>

In [42]:
# cross validation 
# model = SVDpp()

# too big data, reduce 
# result = cross_validate(svdp, data, verbose = True)

In [136]:
model_svdpp = SVDpp()
fit_model_svdpp = model_svdpp.fit(train_data)
predictions_svdpp = fit_model_svdpp.test(test_data)

In [137]:
accuracy.rmse(predictions_svdpp)

RMSE: 3.8744


3.8744140710638275

In [146]:
# predict one uid and one iid
pred1 = model_svdpp.predict(uid ='6', iid = '2' )
pred1.est

2.2592226310947563

In [138]:
# Prediction df for best and wordst cases 

df = pd.DataFrame(predictions_svdpp, columns=['uid', 'iid', 'rui', 'est', 'details'])    
df['Iu'] = df.uid.apply(get_Iu)
df['Ui'] = df.iid.apply(get_Ui)
df['err'] = abs(df.est - df.rui)


best_predictions_pp = df.sort_values(by='err')[:10]
worst_predictions_pp = df.sort_values(by='err')[-10:]
best_predictions_pp
worst_predictions_pp


Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
0,204864,312995423,0.0,0.0,{'was_impossible': False},170,55,0.0
4744,177374,373218036,0.0,0.0,{'was_impossible': False},21,53,0.0
4748,15418,553280368,0.0,0.0,{'was_impossible': False},19,86,0.0
4755,180957,385420161,0.0,0.0,{'was_impossible': False},25,51,0.0
4758,39646,446605484,0.0,0.0,{'was_impossible': False},100,70,0.0
4760,244688,312983824,0.0,0.0,{'was_impossible': False},23,58,0.0
4770,187145,385503822,0.0,0.0,{'was_impossible': False},89,45,0.0
4771,55493,60987529,0.0,0.0,{'was_impossible': False},22,92,0.0
4779,112001,515128546,0.0,0.0,{'was_impossible': False},51,71,0.0
4780,225989,345380371,0.0,0.0,{'was_impossible': False},49,96,0.0


Unnamed: 0,uid,iid,rui,est,details,Iu,Ui,err
1456,198930,380002930,0.0,10.0,{'was_impossible': False},11,85,10.0
3188,110912,452282829,0.0,10.0,{'was_impossible': False},97,113,10.0
3364,236340,446605239,0.0,10.0,{'was_impossible': False},76,157,10.0
2146,167471,452282152,0.0,10.0,{'was_impossible': False},57,140,10.0
9798,152946,156027321,0.0,10.0,{'was_impossible': False},18,143,10.0
8378,36606,743224574,10.0,0.0,{'was_impossible': False},131,42,10.0
7117,104211,446605409,10.0,0.0,{'was_impossible': False},31,36,10.0
417,23571,312195516,10.0,0.0,{'was_impossible': False},16,189,10.0
1512,214786,553268880,10.0,0.0,{'was_impossible': False},52,112,10.0
5391,141493,385336179,10.0,0.0,{'was_impossible': False},55,62,10.0


In [None]:
# Use GridSearchCV 

In [152]:
param_grid = {'n_factors':[20,30,40],'n_epochs':[10,20,30],  'lr_all':[0.005,0.009, 0.01],'reg_all':[0.02, 0.07, 0.1]}
gs = GridSearchCV(SVDpp, param_grid, measures=['rmse'], cv=3)

In [10]:
svdtuned = SVD(n_factors=params['n_factors'], n_epochs=params['n_epochs'],lr_all=params['lr_all'], reg_all=params['reg_all')