In [1]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

### Reimlementation of SVD

In [23]:
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from utils import DataLoader
from matrix_factorization import SVD

# train_set, test_set = DataLoader("../../data").load_csv2ndarray()
train_set, test_set = DataLoader("../movielens100k").load_csv2ndarray()

In [27]:
nfactors = 40

svd = SVD(
    learning_rate=0.005,
    regularization=0.02,
    n_epochs=31, n_factors=nfactors,
    min_rating=0.5, max_rating=5
)

svd.fit(
    X=train_set,
    X_val=test_set,
    early_stopping=False, shuffle=False
)

pred = svd.predict(test_set)
rmse = sqrt(mean_squared_error(test_set[:, 2], pred))
mae = mean_absolute_error(test_set[:, 2], pred)

print(f'\nTest RMSE: {rmse:.5f}')
print(f'Test MAE: {mae:.5f}')

Start training...
Epoch 1/31  | train_loss: 0.98507 - val_loss: 0.91726 - val_rmse: 0.95774 - val_mae: 0.75672 - took 0.01 sec
Epoch 2/31  | train_loss: 0.88181 - val_loss: 0.87803 - val_rmse: 0.93703 - val_mae: 0.73345 - took 0.01 sec
Epoch 3/31  | train_loss: 0.84095 - val_loss: 0.85936 - val_rmse: 0.92702 - val_mae: 0.72274 - took 0.01 sec
Epoch 4/31  | train_loss: 0.81497 - val_loss: 0.84818 - val_rmse: 0.92096 - val_mae: 0.71655 - took 0.01 sec
Epoch 5/31  | train_loss: 0.79547 - val_loss: 0.84062 - val_rmse: 0.91685 - val_mae: 0.71252 - took 0.01 sec
Epoch 6/31  | train_loss: 0.77945 - val_loss: 0.83513 - val_rmse: 0.91385 - val_mae: 0.70962 - took 0.01 sec
Epoch 7/31  | train_loss: 0.76546 - val_loss: 0.83092 - val_rmse: 0.91155 - val_mae: 0.70746 - took 0.01 sec
Epoch 8/31  | train_loss: 0.75271 - val_loss: 0.82758 - val_rmse: 0.90972 - val_mae: 0.70580 - took 0.01 sec
Epoch 9/31  | train_loss: 0.74068 - val_loss: 0.82486 - val_rmse: 0.90822 - val_mae: 0.70445 - took 0.01 sec
E

### SVD from NicolasHug/Surprise

In [21]:
from surprise.prediction_algorithms import matrix_factorization
from surprise import Dataset
from surprise import Reader
from surprise import accuracy

import pandas as pd
import time


# train = pd.read_csv('../../data/rating_train.csv')
# test = pd.read_csv('../../data/rating_test.csv')
train = pd.read_csv('../movielens100k/rating_train.csv')
test = pd.read_csv('../movielens100k/rating_test.csv')

reader = Reader(rating_scale=(0.5,5))
train_set = Dataset.load_from_df(train[['userId','movieId','rating']],reader=reader)
test_set = Dataset.load_from_df(test[['userId','movieId','rating']],reader=reader)

trainset = train_set.build_full_trainset()
testset = test_set.build_full_trainset().build_testset()

t1 = time.time()

algo = matrix_factorization.SVD(n_factors=40, n_epochs=31)
algo.fit(trainset)

predictions = algo.test(testset)
accuracy.rmse(predictions)
accuracy.mae(predictions)

t2 = time.time()
print(t2 - t1)

RMSE: 0.8993
MAE:  0.6965
4.811099052429199
