In [1]:
!pip install scikit-surprise
!pip install scikit-optimize
!pip install pandas-profiling

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3156232 sha256=4fa057422ee0069fdeff253753d82a89da2f6b3e7e397dc2fb9c405ca2c88239
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3
Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 kB[0m [31m2.6 MB/s[0m eta [3

In [2]:
import pandas as pd
from surprise import accuracy, Dataset, SVD, SVDpp, NMF
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate

In [3]:
data = Dataset.load_builtin(name='ml-100k', prompt=True)

Dataset ml-100k could not be found. Do you want to download it? [Y/n] y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k


In [4]:
algorithms = [SVD(), SVDpp(), NMF()]


In [5]:
SVD_result = cross_validate(algorithms[0], data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
SVD_result = pd.DataFrame.from_dict(SVD_result).mean(axis=0)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9358  0.9435  0.9356  0.9328  0.9305  0.9356  0.0044  
MAE (testset)     0.7346  0.7452  0.7383  0.7345  0.7367  0.7379  0.0039  
Fit time          1.08    1.94    3.10    2.05    2.27    2.09    0.65    
Test time         0.11    0.30    0.21    0.40    0.16    0.23    0.10    


In [6]:
SVDpp_result = cross_validate(algorithms[1], data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
SVDpp_result = pd.DataFrame.from_dict(SVDpp_result).mean(axis=0)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9240  0.9161  0.9186  0.9169  0.9181  0.9187  0.0028  
MAE (testset)     0.7224  0.7194  0.7217  0.7182  0.7191  0.7202  0.0016  
Fit time          25.54   24.27   24.69   24.08   23.90   24.50   0.58    
Test time         3.77    3.62    3.74    3.61    3.55    3.66    0.08    


In [7]:
NMF_result = cross_validate(algorithms[2], data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
NMF_result = pd.DataFrame.from_dict(NMF_result).mean(axis=0)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9649  0.9636  0.9533  0.9724  0.9586  0.9626  0.0064  
MAE (testset)     0.7559  0.7575  0.7482  0.7645  0.7555  0.7563  0.0052  
Fit time          1.15    1.17    4.45    2.80    1.17    2.15    1.31    
Test time         0.09    0.18    0.32    0.18    0.16    0.19    0.08    


In [8]:
surprise_results = pd.DataFrame(columns=['SVD', 'SVDpp', 'NMF'])

In [9]:
surprise_results['SVD'] = SVD_result
surprise_results['SVDpp'] = SVDpp_result
surprise_results['NMF'] = NMF_result

In [10]:
surprise_results

Unnamed: 0,SVD,SVDpp,NMF
test_rmse,0.935647,0.918749,0.962561
test_mae,0.737863,0.720152,0.756315
fit_time,2.089628,24.496406,2.149446
test_time,0.234592,3.659715,0.187179


In [11]:
train_data, test_data = train_test_split(data, test_size=0.25)

In [12]:
predictions_svd = algorithms[0].fit(train_data).test(test_data)
predictions_svdpp = algorithms[1].fit(train_data).test(test_data)
predictions_nmf = algorithms[2].fit(train_data).test(test_data)

In [74]:


def print_rmse(algorithm_name, predictions):
    rmse = accuracy.rmse(predictions)
    formatted_name = f'| {algorithm_name:<15} |'
    formatted_rmse = f' RMSE: {rmse:.4f} |'

    for alg in  algorithm_name:  # Add a horizontal line after the last row
        separator = '-' * (len(formatted_name) + len(formatted_rmse) - 1)


    row = f'{formatted_name}{formatted_rmse}'
    print(f'{formatted_name}{formatted_rmse}')
    # print(row)
    print(separator)

print(f'| {"Algorithm":<15} | {"RMSE":^13} |')
print('-' * 40)

# Assuming you have predictions_svd, predictions_svdpp, and predictions_nmf

print_rmse('SVD:', predictions_svd)
print_rmse('SVDpp:', predictions_svdpp)
print_rmse('NMF:', predictions_nmf)




| Algorithm       |     RMSE      |
----------------------------------------
RMSE: 0.9351
RMSE: 0.9193
RMSE: 0.9649
