In [34]:
# Importing needed tools, functions and dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from surprise import Dataset
from surprise.model_selection import train_test_split, cross_validate, GridSearchCV
from surprise import SVD
from collections import defaultdict
import joblib

In [8]:
# Loading the built-in dataset "MovieLens 100k"
data = Dataset.load_builtin("ml-100k")

In [35]:
# Splitting and fitting the data for predictions 

#trainset, testset = train_test_split(data, test_size=0.2)
#model = SVD()
#model.fit(trainset)

In [36]:
# Predicting using the test data

#preds = model.test(testset)

In [28]:
# Saving the baseline model using joblib

from joblib import dump, load

dump(model, filename="SVD_model.joblib")

['SVD_model.joblib']

In [29]:
# Loading the baseline model for evaluation

loaded_model = load(filename="SVD_model.joblib")

In [30]:
# Evaluating the baseline model

cross_validate(loaded_model, data, measures=["MSE", "MAE", "RMSE"], cv=5, verbose=2)

Evaluating MSE, MAE, RMSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MSE (testset)     0.8680  0.8780  0.8830  0.8836  0.8677  0.8761  0.0070  
MAE (testset)     0.7354  0.7389  0.7378  0.7424  0.7362  0.7381  0.0024  
RMSE (testset)    0.9316  0.9370  0.9397  0.9400  0.9315  0.9360  0.0037  
Fit time          0.79    0.75    0.72    0.74    0.75    0.75    0.02    
Test time         0.07    0.07    0.07    0.07    0.07    0.07    0.00    


{'test_mse': array([0.86795085, 0.87803133, 0.88298012, 0.88359729, 0.86769396]),
 'test_mae': array([0.73544189, 0.73885191, 0.7378215 , 0.74237299, 0.73623492]),
 'test_rmse': array([0.9316388 , 0.93703326, 0.93967022, 0.93999856, 0.93150092]),
 'fit_time': (0.7908973693847656,
  0.7498736381530762,
  0.7249057292938232,
  0.7378416061401367,
  0.7476499080657959),
 'test_time': (0.0715324878692627,
  0.06954813003540039,
  0.07201504707336426,
  0.07056689262390137,
  0.06953597068786621)}

In [14]:
# Checking parameters of the estimator (SVD)

model.__dict__

{'n_factors': 100,
 'n_epochs': 20,
 'biased': True,
 'init_mean': 0,
 'init_std_dev': 0.1,
 'lr_bu': 0.005,
 'lr_bi': 0.005,
 'lr_pu': 0.005,
 'lr_qi': 0.005,
 'reg_bu': 0.02,
 'reg_bi': 0.02,
 'reg_pu': 0.02,
 'reg_qi': 0.02,
 'random_state': None,
 'verbose': False,
 'bsl_options': {},
 'sim_options': {'user_based': True},
 'trainset': <surprise.trainset.Trainset at 0x20120afbb80>,
 'bu': array([ 4.48040384e-01,  1.58928054e-01, -6.12581998e-01,  2.36579924e-01,
        -1.64710147e-01,  1.63776259e-02,  9.84936946e-02, -1.20052195e-01,
        -2.06811906e-01, -1.45651845e-01, -2.90380441e-01,  1.59593984e-01,
        -2.27375690e-01, -3.75578556e-01, -5.21556996e-01, -6.05995508e-02,
         4.60012302e-01, -6.95044887e-01, -2.41514383e-01,  5.68351879e-02,
         8.06817849e-03,  1.53601608e-01,  2.43055894e-01, -2.99487453e-01,
        -4.78635126e-01,  2.14003774e-01, -7.85199965e-01,  2.32047288e-01,
         3.58520776e-01, -1.22216794e+00, -2.55896695e-01, -4.71232021e-01

In [37]:
# Making a parameter grid for hyperparameter tuning 

param_grid = {
    "n_epochs": [5, 10, 15],
    "lr_all": [0.002, 0.005, 0.008],
    "reg_all": [0.4, 0.6, 0.8]
}

#Using GridSearchCV to find the best combination of parameters 

#gs = GridSearchCV(SVD, param_grid, measures=["RMSE", "MAE", "MSE"], cv=5)

#gs.fit(data)


In [20]:
# Checking the best parameter after using GridSearchCV

gs.best_params

{'rmse': {'n_epochs': 15, 'lr_all': 0.008, 'reg_all': 0.4},
 'mae': {'n_epochs': 15, 'lr_all': 0.008, 'reg_all': 0.4},
 'mse': {'n_epochs': 15, 'lr_all': 0.008, 'reg_all': 0.4}}

In [21]:
# Checking the score using the parameters mentioned above

gs.best_score

{'rmse': 0.9565229245637195,
 'mae': 0.7658326422516126,
 'mse': 0.9149660400306667}

In [23]:
# Using the parameters we got after using GridSearchCV

gs_model_tuned = SVD(n_epochs=15, lr_all=0.008, reg_all=0.4)

gs_model_tuned.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x20129e70cd0>

In [25]:
# Predicting

y_preds = gs_model_tuned.test(testset)

In [31]:
# Saving the tuned model using joblib

dump(gs_model_tuned, filename="SVD_tuned_model.joblib")

['SVD_tuned_model.joblib']

In [32]:
# Loading the tuned model 

loaded_tuned_model = load(filename="SVD_tuned_model.joblib")

In [33]:
# Evaluating the tuned model

cross_validate(loaded_tuned_model, data, measures=["MSE", "MAE", "RMSE"], cv=5, verbose=2)

Evaluating MSE, MAE, RMSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MSE (testset)     0.9275  0.9155  0.9131  0.9165  0.9020  0.9149  0.0082  
MAE (testset)     0.7719  0.7675  0.7637  0.7678  0.7622  0.7666  0.0034  
RMSE (testset)    0.9631  0.9568  0.9555  0.9573  0.9497  0.9565  0.0043  
Fit time          0.59    0.58    0.57    0.57    0.57    0.58    0.01    
Test time         0.07    0.22    0.07    0.07    0.07    0.10    0.06    


{'test_mse': array([0.92752896, 0.91554896, 0.91305627, 0.91650727, 0.90199771]),
 'test_mae': array([0.77189926, 0.76752946, 0.76374707, 0.76784497, 0.76219639]),
 'test_rmse': array([0.96308305, 0.95684323, 0.95553978, 0.95734386, 0.9497356 ]),
 'fit_time': (0.5913136005401611,
  0.5792508125305176,
  0.5712459087371826,
  0.565086841583252,
  0.574458122253418),
 'test_time': (0.07050585746765137,
  0.22423553466796875,
  0.0725712776184082,
  0.0699920654296875,
  0.07059192657470703)}

In [39]:
# Getting recommendations according to the user_id from the predicted data

def get_rec(predi, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in y_preds:
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n
top_n = get_rec(y_preds, n=5)

user_id = '242'
print(f"Top 5 recommendations for user {user_id}")

for movie_id, predicted_rating in top_n[user_id]:
    print(f"Movie ID: {movie_id}, predicted rating: {predicted_rating:.2f}")

Top 5 recommendations for user 242
Movie ID: 1137, predicted rating: 4.30
Movie ID: 268, predicted rating: 4.14
Movie ID: 111, predicted rating: 3.99
Movie ID: 740, predicted rating: 3.97
Movie ID: 1152, predicted rating: 3.96
