In [1]:
# this is just to know how much time will it take to run this entire ipython notebook 
from datetime import datetime

In [2]:
import pandas as pd
import numpy as np

In [3]:
import matplotlib
matplotlib.use('nbagg')

import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})

import seaborn as sns
sns.set_style('whitegrid')

## reading data from the csv file

In [None]:
df = pd.read_csv('data.csv', sep=',', 
                       names=['movie', 'user','rating','date'])
df.date = pd.to_datetime(df.date)

# we are arranging the ratings according to time.
df.sort_values(by='date', inplace=True)
df.head()

### Is there any entry that has Nan values...??

In [6]:
# it returns True for a row that has atleast one Nan value.
null_bool = df.isnull().any(axis = 1)
df[null_bool]

Unnamed: 0,movie,user,rating,date
95880696,16992,962,,NaT
95963294,17002,51082,,NaT


In [7]:
df.drop(df[null_bool].index, inplace=True)

In [8]:
print("No of Nan values in our dataframe : ", sum(df.isnull().any()))

No of Nan values in our dataframe :  0


### Let's try to find the duplicates if any...

In [9]:
dup_bool = df.duplicated(['movie','user','rating'])
dups = sum(dup_bool) # by considering all columns..( including timestamp )
print("There are {} duplicate rating entries in the data..".format(dups))

There are 1211 duplicate rating entries in the data..


In [10]:
# let's find what are the movies that has duplicate entries of user ratings....
df[dup_bool].movie.value_counts()

16969    762
16968    449
Name: movie, dtype: int64

__Removing those duplicate entries__  (inplace)

In [11]:
df.drop(df[dup_bool].index, inplace=True)

In [12]:
print("No of duplicate rows (movie, user, rating) entries :", sum(df.duplicated()))

No of duplicate rows (movie, user, rating) entries : 0


In [13]:
print("\nNo of ratings that we have, after removing nan rows and duplicates are :", df.shape[0])


No of ratings that we have, after removing nan rows and duplicates are : 100479045


## No. of Ratings, Users and Movies  

In [14]:
movies = df.movie.value_counts()
users = df.user.value_counts()
ratings = df.rating
print("Total data ")
print("-"*50)
print("\nTotal no of ratings :",df.shape[0])
print("Total No of Users   :", len(users))
print("Total No of movies  :", len(movies))

Total data 
--------------------------------------------------

Total no of ratings : 100479045
Total No of Users   : 480189
Total No of movies  : 17770


## Train data

In [15]:
big_train_df = df.iloc[:int(df.shape[0]*0.80)]

In [16]:
big_train_df.shape

(80383236, 4)

In [17]:
big_train_df.head(2)

Unnamed: 0,movie,user,rating,date
58698779,10774,510180,3.0,1999-11-11
96212476,17064,510180,2.0,1999-11-11


In [18]:
movies = big_train_df.movie.value_counts()
users = big_train_df.user.value_counts()
print("Training data ")
print("-"*50)
print("\nTotal no of ratings :",big_train_df.shape[0])
print("Total No of Users   :", len(users))
print("Total No of movies  :", len(movies))

Training data 
--------------------------------------------------

Total no of ratings : 80383236
Total No of Users   : 405024
Total No of movies  : 17423


# 2. Creating sparse matrix for Train data

In [4]:
import os
from scipy import sparse
start = datetime.now()
if os.path.isfile('sample/train_sparse_matrix.npz'):
    print("It is present in your pwd, getting it from disk....")
    # just get it from the disk instead of computing it
    train_sparse_matrix = sparse.load_npz('sample/train_sparse_matrix.npz')
    print("DONE..")
else: 
    print("We are creating sparse_matrix from the dataframe..")
    # create sparse_matrix and store it for after usage.
    # csr_matrix(data_values, (row_index, col_index), shape_of_matrix)
    # It should be in such a way that, MATRIX[row, col] = data
    train_sparse_matrix = sparse.csr_matrix((big_train_df.rating.values, (big_train_df.user.values,
                                               big_train_df.movie.values)))
    
    print('Done. It\'s shape is : (user, movie) : ',train_sparse_matrix.shape)
    print('Saving it into disk for furthur usage..')
    # save it into disk
    sparse.save_npz("sample/train_sparse_matrix.npz", train_sparse_matrix)
    print('Done..\n')
 
print(datetime.now() - start)

It is present in your pwd, getting it from disk....
DONE..
0:00:03.212726


### Sparsity

In [5]:
u,m = train_sparse_matrix.shape
elem = train_sparse_matrix.count_nonzero()

In [6]:
print("Sparsity Of matrix : {} % ".format(  (1-(elem/(u*m))) * 100) )

Sparsity Of matrix : 99.82927583214679 % 


## Test

In [None]:
big_test_df = df.iloc[int(df.shape[0]*0.80) : ]
big_test_df.shape

In [24]:
big_test_df.head(2)

Unnamed: 0,movie,user,rating,date
52863848,9617,316390,2.0,2005-08-08
12989568,2462,605375,4.0,2005-08-08


In [25]:
movies = big_test_df.movie.value_counts()
users = big_test_df.user.value_counts()

print("Test data ")
print("-"*50)
print("\nTotal no of ratings :",big_test_df.shape[0])
print("Total No of Users   :", len(users))
print("Total No of movies  :", len(movies))

Test data 
--------------------------------------------------

Total no of ratings : 20095809
Total No of Users   : 349327
Total No of movies  : 17757


# 3. Creating sparse matrix for Test data

In [8]:
import os
from scipy import sparse
start = datetime.now()
if os.path.isfile('sample/test_sparse_matrix.npz'):
    print("It is present in your pwd, getting it from disk....")
    # just get it from the disk instead of computing it
    test_sparse_matrix = sparse.load_npz('sample/test_sparse_matrix.npz')
    print("DONE..")
else: 
    print("We are creating sparse_matrix from the dataframe..")
    # create sparse_matrix and store it for after usage.
    # csr_matrix(data_values, (row_index, col_index), shape_of_matrix)
    # It should be in such a way that, MATRIX[row, col] = data
    test_sparse_matrix = sparse.csr_matrix((big_test_df.rating.values, (big_test_df.user.values,
                                               big_test_df.movie.values)))
    
    print('Done. It\'s shape is : (user, movie) : ',test_sparse_matrix.shape)
    print('Saving it into disk for furthur usage..')
    # save it into disk
    sparse.save_npz("sample/test_sparse_matrix.npz", test_sparse_matrix)
    print('Done..\n')
 
print(datetime.now() - start)

It is present in your pwd, getting it from disk....
DONE..
0:00:00.855393


### Sparsity

In [9]:
u,m = test_sparse_matrix.shape
elem = test_sparse_matrix.count_nonzero()

In [16]:
elem

20095713

In [10]:
print("Sparsity Of matrix : {} % ".format(  (1-(elem/(u*m))) * 100) )

Sparsity Of matrix : 99.95731855608713 % 


# Computing Average Ratings (from Train data)

In [11]:
averages = dict()

In [12]:
# Boolean matrix of ratings ( whether a user rated that movie or not)
is_rated = train_sparse_matrix!=0

In [13]:
# get the global average of ratings in our train set.
global_average = train_sparse_matrix.sum()/train_sparse_matrix.count_nonzero()
averages['global'] = global_average
averages

{'global': 3.5829128738184792}

### User averages

In [14]:
# get the user averages in dictionary (key: userid, value: avg rating)
#_____________________________________________________________________#

# ".A1" is for converting Column_Matrix to 1-D numpy array 
sum_of_ratings_per_user = train_sparse_matrix.sum(axis=1).A1
# no of ratings that each user has given.
no_of_ratings_per_user = is_rated.sum(axis=1).A1

# creae a dictonary of users and their average ratigns..
average_user_ratings = { i : sum_of_ratings_per_user[i]/no_of_ratings_per_user[i]  
                                 for i in range(train_sparse_matrix.shape[0]) 
                                    if no_of_ratings_per_user[i] !=0}

# add user averages to th eaverages dictionary
averages['user'] = average_user_ratings

# test it..
averages['user'][97]

3.182377049180328

### Movie Averages

In [15]:
# get the Movie Average ratings in dictionary (key: movieId, value: avg_rating)
#_____________________________________________________________________#

# sum of the ratings that a movie got by any user(who rated that movie..)
sum_of_ratings_per_movie = train_sparse_matrix.sum(axis=0).A1
# no of ratings that a movie got.
no_of_ratings_per_movie = is_rated.sum(axis=0).A1

average_movie_ratings = {i : sum_of_ratings_per_movie[i]/ no_of_ratings_per_movie[i] 
                                for i in range(train_sparse_matrix.shape[1])
                                    if no_of_ratings_per_movie[i]!=0 }

# add thie'per_movie' avg ratings to averages dictionary
averages['movie'] = average_movie_ratings

# test this dictionary
averages['movie'][30]

3.7808264297834113

## Preparing Trainset and testset for Surprise based alorithms

### TrainSet

In [4]:
from surprise import Reader, Dataset

In [5]:
import pickle
import os
if os.path.isfile('sample/large/trainset.pickle'):
    print('loading it from the disk')
    trainset = pickle.load(open('sample/large/trainset.pickle', 'rb'))
    print('done')
else:
    print('creating it from sparse_matrix ( if it is loaded)')
    
    train_users, train_movies, train_ratings = sparse.find(train_sparse_matrix)
    
   

    print('preparing train dataframe with users, movies and ratings of the trainset..')
    surp_train = pd.DataFrame({'user': train_users,
                           'movie': train_movies,
                           'rating': train_ratings}, )
    surp_train = surp_train[['user','movie','rating']]
    print(surp_train.head(2))
    
    print('Creating trainset from the dataframe...')
    trainset = Dataset.load_from_df(surp_train,Reader(rating_scale=(1,5))).build_full_trainset()
    
    print('No of unique users, unique movies and ratings in train data', end=' : ')
    print('(users, movies, ratings) : ({}, {}, {})'.format(trainset.n_users, 
                                                           trainset.n_items,
                                                           trainset.n_ratings))
    
    # saving testset to dsik
    print('saving it to disk..')
    start = datetime.now()
    with open('sample/large/trainset.pickle', 'wb') as f:
        pickle.dump(trainset, f)
    print(datetime.now() - start)
    print('Done')

loading it from the disk
done


In [6]:
trainset.n_users, trainset.n_items, trainset.n_ratings

(405024, 17423, 80382095)

### TestSet

In [7]:
import pickle
if os.path.isfile('sample/large/testset.pickle'):
    print('loading testset from the disk..')
    testset = pickle.load(open('sample/large/testset.pickle', 'rb'))
    print('Done')
else:
    print('creating tesetset from sparse matrix.( if it is loaded )')
    
    test_users, test_movies, test_ratings = sparse.find(test_sparse_matrix)
    
    print('No of unique users, unique movies..', end=' : ')
    print(len(np.unique(test_users)),  len(np.unique(test_movies)))

    print("No of ratings in test set :",len(test_ratings))

    testset = list(zip(test_users, test_movies, test_ratings))

    # saving testset to dsik
    print('saving it to disk..')
    start = datetime.now()
    with open('sample/large/testset.pickle', 'wb') as f:
        pickle.dump(testset, f)
    print(datetime.now() - start)
    print('Done')

loading testset from the disk..
Done


## Generic function to run any surprise based algorithm 

    - given prefectly initialized ALGO, TRAINSET and TESTSET
    

In [8]:
from datetime import datetime
import numpy as np

##########################################################
# get  (actual_list , predicted_list) ratings given list 
# of predictions (prediction is a class in Surprise).    
##########################################################
def get_ratings(predictions):
    actual = np.array([pred.r_ui for pred in predictions])
    pred = np.array([pred.est for pred in predictions])
    
    return actual, pred

################################################################
# get ''rmse'' and ''mape'' , given list of prediction classes 
################################################################
def get_errors(predictions, print_them=False):

    actual, pred = get_ratings(predictions)
    rmse = np.sqrt(np.mean(pred - actual)**2)
    mape = np.mean(np.abs(pred - actual)/actual)

    return rmse, mape*100

##################################################################################
# It will return predicted ratings, rmse and mape of both train and test data   #
##################################################################################
def run_surprise(algo, trainset, testset,evaluate_train=False, verbose=True): 
    '''
        return train_dict, test_dict
    
        It returns two dictionaries, one for train and the other is for test
        Each of them have 3 key-value pairs, which specify ''rmse'', ''mape'', and ''predicted ratings''.
    '''
    start = datetime.now()
    # dictionaries that stores metrics for train and test..
    train = dict()
    test = dict()
    
    # train the algorithm with the trainset
    st = datetime.now()
    print('Training the model...')
    algo.fit(trainset)
    print('Done. time taken : {} \n'.format(datetime.now()-st))
    
    # ---------------- Evaluating train data--------------------#
    if evaluate_train:
        st = datetime.now()
        print('Evaluating the model with train data..')
        # get the train predictions (list of prediction class inside Surprise)
        train_preds = algo.test(trainset.build_testset())
        # get predicted ratings from the train predictions..
        train_actual_ratings, train_pred_ratings = get_ratings(train_preds)
        # get ''rmse'' and ''mape'' from the train predictions.
        train_rmse, train_mape = get_errors(train_preds)
        print('time taken : {}'.format(datetime.now()-st))
        if verbose:
            print('-'*15)
            print('Train Data')
            print('-'*15)
            print("RMSE : {}\n\nMAPE : {}\n".format(train_rmse, train_mape))
    
        #store them in the train dictionary
        if verbose:
            print('adding train results in the dictionary..')
        train['rmse'] = train_rmse
        train['mape'] = train_mape
        train['predictions'] = train_pred_ratings
    else:
        print('\n we are skipping model evaluation with train data..\n')
        train = None
    
    
    
    #------------ Evaluating Test data---------------#
    st = datetime.now()
    print('\nEvaluating for test data...')
    # get the predictions( list of prediction classes) of test data
    test_preds = algo.test(testset)
    # get the predicted ratings from the list of predictions
    test_actual_ratings, test_pred_ratings = get_ratings(test_preds)
    # get error metrics from the predicted and actual ratings
    test_rmse, test_mape = get_errors(test_preds)
    print('time taken : {}'.format(datetime.now()-st))
    
    if verbose:
        print('-'*15)
        print('Test Data')
        print('-'*15)
        print("RMSE : {}\n\nMAPE : {}\n".format(test_rmse, test_mape))
    # store them in test dictionary
    if verbose:
        print('storing the test results in test dictionary...')
    test['rmse'] = test_rmse
    test['mape'] = test_mape
    test['predictions'] = test_pred_ratings
    
    print('\n'+'-'*45)
    print('Total time taken to run this algorithm :', datetime.now() - start)
    
    # return two dictionaries train and test
    return train, test

### Some Global dictionary that stores rmse and mape for all the models....

- It stores the metrics in a dictionary of dictionaries

> __keys__ : model names(string)

> __value__: dict(__key__ : metric, __value__ : value )

In [9]:
models_evaluation_train = dict()
models_evaluation_test = dict()

models_evaluation_train, models_evaluation_test

({}, {})

## 1. Baseline Model  ( with User and Item biases )

In [17]:
from surprise import BaselineOnly

- #### Predicted_rating : ( baseline prediction )

>$   \large {\hat{r}_{ui} = b_{ui} =\mu + b_u + b_i} $

- ####  Optimization function ( Least Squares Problem )

> $ \large \sum_{r_{ui} \in R_{train}} \left(r_{ui} - (\mu + b_u + b_i)\right)^2 +
\lambda \left(b_u^2 + b_i^2 \right).\text {        [mimimize } {b_u, b_i]}$

In [18]:
bsl_options = {'method': 'sgd',
               'learning_rate': .001
               }
bsl = BaselineOnly(bsl_options=bsl_options)

In [28]:
bsl_train_results, bsl_test_results = run_surprise(bsl, trainset, testset,
                                                   evaluate_train=False ,verbose=True)

Training the model...
Estimating biases using sgd...
Done. time taken : 0:02:36.793990 


 we are skipping model evaluation with train data..


Evaluating for test data...
time taken : 0:03:07.972872
---------------
Test Data
---------------
RMSE : 0.02624592932029108

MAPE : 30.90283402482485

storing the test results in test dictionary...

---------------------------------------------
Total time taken to run this algorithm : 0:05:44.767861


In [29]:
# store them in models dictionary..
models_evaluation_train['bsl'] = bsl_train_results
models_evaluation_test['bsl'] = bsl_test_results

models_evaluation_test['bsl']

{'mape': 30.90283402482485,
 'predictions': array([4.04593075, 3.06951653, 3.9550621 , ..., 2.4852953 , 2.89735459,
        2.89735459]),
 'rmse': 0.02624592932029108}

In [33]:
# store the trained model in disk...
from surprise import dump

dump.dump('sample/large/bsl_algo', predictions=models_evaluation_test['bsl']['predictions'],
          algo=bsl, verbose=1)

The dump has been saved as file bsl_algo


# 2. KNN with Baseline_model 

In [None]:
from surprise import KNNBaseline

- __predicted Rating__ : ( ___ based on User-User similarity ___ )

\begin{align} \hat{r}_{ui} = b_{ui} + \frac{ \sum\limits_{v \in N^k_i(u)}
\text{sim}(u, v) \cdot (r_{vi} - b_{vi})} {\sum\limits_{v \in
N^k_i(u)} \text{sim}(u, v)} \end{align}

- $\pmb{b_{ui}}$ -  _Baseline prediction_ of (user,movie) rating


- $ \pmb {N_i^k (u)}$ - Set of __K similar__ users (neighbours) of __user (u)__ who rated __movie(i)__  


- _sim (u, v)_ - __Similarity__ between users __u and v__  
    - Generally, it will be cosine similarity or Pearson correlation coefficient. 
    - But we use __shrunk Pearson-baseline correlation coefficient__, which is based on the pearsonBaseline similarity ( we take base line predictions instead of mean rating of user/item)
        - Computation of the correlation coefficient is based only on the common user support.
        - similarities based on a greater user support are more reliable ie., Users who has more number of common movie ratings are considered as more similar than users who has few no of movies  in common which are rated.
        - \begin{align}\begin{aligned}\text{pearson_baseline_shrunk_sim}(u, v) &= \frac{|I_{uv}| - 1}{|I_{uv}| - 1 + \text{shrinkage}} \cdot \hat{\rho}_{uv}\end{aligned}\end{align}
            - $\pmb{|I_{uv}|}$ - No of common movies between users(u and v)
            - __shrinkage__ - kind of hyperparameter. The defalut value suggested is ___100___
                - __0__ : There is no shrinkage at all ( It is normal pearson correlation coefficient ) 
            - $ \pmb {\hat \rho_uv}$ - Pearson Correlation Coefficient ( between users )
                - \begin{align} \text{pearson_baseline_sim}(u, v) = \hat{\rho}_{uv} = \frac{
    \sum\limits_{i \in I_{uv}} (r_{ui} -  b_{ui}) \cdot (r_{vi} -
    b_{vi})} {\sqrt{\sum\limits_{i \in I_{uv}} (r_{ui} -  b_{ui})^2}
    \cdot \sqrt{\sum\limits_{i \in I_{uv}} (r_{vi} -  b_{vi})^2}} \end{align}
 

 ------------ or 
 --

- __ Predicted rating __ ( based on Item Item similarity ):
 \begin{align} \hat{r}_{ui} = b_{ui} + \frac{ \sum\limits_{j \in N^k_u(i)}\text{sim}(i, j) \cdot (r_{uj} - b_{uj})} {\sum\limits_{j \in N^k_u(j)} \text{sim}(i, j)} \end{align}

    -  ___Notations follows same as above (user user based predicted rating ) ___

- We can do both and blend them ( see if we can better results when combined ) 

## 2.1 KNN with User User similarities 

In [None]:
# we specify , how to compute similarities and what to consider with sim_options to our algorithm
sim_options = {'user_based' : True,
               'name': 'pearson_baseline',
               'shrinkage': 100,
               'min_support': 2
              } 

bsl_options = {'method': 'sgd'} # we keep other parameters like regularization parameter and learning_rate as default values.

In [None]:
knn_bsl_u = KNNBaseline(k=40, sim_options = sim_options, bsl_options = bsl_options)

knn_bsl_u_train_results, knn_bsl_u_test_results = run_surprise(knn_bsl_u, trainset, testset, 
                                                               evaluate_train=False, verbose=True)

In [None]:
models_evaluation_train['knn_bsl_u'] = knn_bsl_u_train_results
models_evaluation_test['knn_bsl_u']  = knn_bsl_u_test_results

models_evaluation_test['knn_bsl_u'] 

## 2.2 KNN with Item Item similarities 

In [None]:
# we specify , how to compute similarities and what to consider with sim_options to our algorithm
sim_options = {'user_based' : False,
               'name': 'pearson_baseline',
               'shrinkage': 100,
               'min_support': 2
              } 

bsl_options = {'method': 'sgd'} # we keep other parameters like regularization parameter and learning_rate as default values.

In [None]:
knn_bsl_m = KNNBaseline(k=40, sim_options = sim_options, bsl_options = bsl_options)

knn_bsl_m_train_results, knn_bsl_m_test_results = run_surprise(knn_bsl_m, trainset, testset, verbose=True)

In [None]:
models_evaluation_train['knn_bsl_m'] = knn_bsl_m_train_results
models_evaluation_test['knn_bsl_m']  = knn_bsl_m_test_results

models_evaluation_test['knn_bsl_m'] 

# 3. Matrix Factorization Techniques

## 3.1 SVD -  MF algorithm with user item interactions 

In [40]:
from surprise import SVD

- __ Predicted Rating : __
    - $ \large \hat r_{ui} = \mu + b_u + b_i + q_i^Tp_u $ 

- __Optimization problem with user item interactions and regularization (to avoid overfitting)__


- $\Large \sum_{r_{ui} \in R_{train}} \left(r_{ui} - \hat{r}_{ui} \right)^2 +
\lambda\left(b_i^2 + b_u^2 + ||q_i||^2 + ||p_u||^2\right) $

In [41]:
svd = SVD(n_factors=100, n_epochs=20, random_state=15, verbose=True)
svd_train_results, svd_test_results = run_surprise(svd, trainset, testset,
                                                   evaluate_train=False, verbose=True)

Training the model...
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Done. time taken : 1:15:35.637917 


 we are skipping model evaluation with train data..


Evaluating for test data...
time taken : 0:03:43.571852
---------------
Test Data
---------------
RMSE : 0.009300209925620563

MAPE : 29.491243711503127

storing the test results in test dictionary...

---------------------------------------------
Total time taken to run this algorithm : 1:19:19.209769


In [42]:
models_evaluation_train['svd'] = svd_train_results
models_evaluation_test['svd'] = svd_test_results

models_evaluation_test['svd']

{'mape': 29.491243711503127,
 'predictions': array([3.97526121, 2.55296855, 3.63684438, ..., 2.58194006, 2.80058386,
        2.80058386]),
 'rmse': 0.009300209925620563}

In [43]:
# store the trained svd model in disk...
from surprise import dump

dump.dump('sample/large/svd', predictions=models_evaluation_test['bsl']['predictions'],
          algo=bsl, verbose=1)

The dump has been saved as file sample/large/svd


 ### 3.2 SVD -  with Implicit feedback of Items(movies)

In [10]:
from surprise import SVDpp

- __ Predicted Rating : __ 
    - $ \large \hat{r}_{ui} = \mu + b_u + b_i + q_i^T\left(p_u +
    |I_u|^{-\frac{1}{2}} \sum_{j \in I_u}y_j\right) $ 
 


 - $ \pmb{I_u}$ --- the set of all items rated by user u

- $\pmb{y_j}$ --- Our new set of item factors that capture implicit ratings.  

- __Optimization problem with user item interactions and regularization (to avoid overfitting)__ 


   
- $ \Large \sum_{r_{ui} \in R_{train}} \left(r_{ui} - \hat{r}_{ui} \right)^2 +
\lambda\left(b_i^2 + b_u^2 + ||q_i||^2 + ||p_u||^2 + y_j^2\right) $ 

In [None]:
svdpp = SVDpp(n_factors=20, random_state=15, verbose=True)
svdpp_train_results, svdpp_test_results = run_surprise(svdpp, trainset, testset)

Training the model...
 processing epoch 0
 processing epoch 1
 processing epoch 2


In [163]:
models_evaluation_train['svdpp'] = svdpp_train_results
models_evaluation_test['svdpp'] = svdpp_test_results

models_evaluation_test['svdpp']

{'mape': 35.31675253673884,
 'predictions': array([3.91771285, 3.7912363 , 3.43484015, ..., 3.60432926, 3.51151561,
        3.4300295 ]),
 'rmse': 0.2496661363739624}

In [164]:
pd.DataFrame(models_evaluation_test)

Unnamed: 0,bsl,knn_bsl_m,knn_bsl_u,svd,svdpp
mape,34.6112,36.3129,36.3129,35.3592,35.3168
predictions,"[4.036206096101787, 3.6381296161606014, 3.5320...","[4.460806391864557, 3.4690056553542377, 3.4666...","[4.460806391864557, 3.4690056553542377, 3.4666...","[4.12340143043477, 3.719840558294905, 3.438442...","[3.9177128477107477, 3.7912363045212905, 3.434..."
rmse,0.177168,0.259568,0.259568,0.229293,0.249666
