In [1]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

### Reimlementation of SVDpp

In [2]:
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from utils import DataLoader
from matrix_factorization import SVDpp

# train_set, test_set = DataLoader("../../data").load_csv2ndarray()
train_set, test_set = DataLoader("../movielens100k").load_csv2ndarray()

nfactors = 40

svdpp = SVDpp(
    learning_rate=0.005,
    regularization=0.02,
    n_epochs=2, n_factors=nfactors,
    min_rating=0.5, max_rating=5
)

svdpp.fit(
    X=train_set,
    X_val=test_set,
    early_stopping=False, shuffle=False
)

pred = svdpp.predict(test_set)
rmse = sqrt(mean_squared_error(test_set[:, 2], pred))
mae = mean_absolute_error(test_set[:, 2], pred)

print(f'\nTest RMSE: {rmse:.5f}')
print(f'Test MAE: {mae:.5f}')

Start training...
Epoch 1/2  | train_loss: 0.98539 - val_loss: 0.91549 - val_rmse: 0.95681 - val_mae: 0.75593 - took 7.59 sec
Epoch 2/2  | train_loss: 0.87783 - val_loss: 0.87569 - val_rmse: 0.93578 - val_mae: 0.73238 - took 4.72 sec
Done.

Training took 12 sec

Test RMSE: 0.93578
Test MAE: 0.73238


### SVDpp from NicolasHug/Surprise

In [3]:
from surprise.prediction_algorithms import matrix_factorization
from surprise import Dataset
from surprise import Reader
from surprise import accuracy

import pandas as pd
import time


# train = pd.read_csv('../../data/rating_train.csv')
# test = pd.read_csv('../../data/rating_test.csv')
train = pd.read_csv('../movielens100k/rating_train.csv')
test = pd.read_csv('../movielens100k/rating_test.csv')

reader = Reader(rating_scale=(0.5,5))
train_set = Dataset.load_from_df(train[['userId','movieId','rating']],reader=reader)
test_set = Dataset.load_from_df(test[['userId','movieId','rating']],reader=reader)

trainset = train_set.build_full_trainset()
testset = test_set.build_full_trainset().build_testset()

t1 = time.time()

algo = matrix_factorization.SVDpp(n_factors=40, n_epochs=2)
algo.fit(trainset)

predictions = algo.test(testset)
accuracy.rmse(predictions)
accuracy.mae(predictions)

t2 = time.time()
print(t2 - t1)

RMSE: 0.9295
MAE:  0.7246
70.79212808609009
