In [1]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

### Reimlementation of SVD

In [2]:
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from utils import DataLoader
from matrix_factorization import SVD

# train_set, test_set = DataLoader("../../data").load_csv2ndarray()
train_set, test_set = DataLoader("../movielens100k").load_csv2ndarray()

In [3]:
nfactors = 40

svd = SVD(
    learning_rate=0.005,
    regularization=0.02,
    n_epochs=31, n_factors=nfactors,
    min_rating=0.5, max_rating=5
)

svd.fit(
    X=train_set,
    X_val=test_set,
    early_stopping=False, shuffle=False
)

svd.predict(test_set)
svd.rmse()
svd.mae()
svd.precision_recall_at_k(k=20)

Start training...
Epoch 1/31  | train_loss: 0.98532 - val_loss: 0.91740 - val_rmse: 0.95781 - val_mae: 0.75721 - took 1.44 sec
Epoch 2/31  | train_loss: 0.88176 - val_loss: 0.87834 - val_rmse: 0.93720 - val_mae: 0.73378 - took 0.01 sec
Epoch 3/31  | train_loss: 0.84073 - val_loss: 0.85982 - val_rmse: 0.92726 - val_mae: 0.72300 - took 0.01 sec
Epoch 4/31  | train_loss: 0.81460 - val_loss: 0.84875 - val_rmse: 0.92128 - val_mae: 0.71680 - took 0.01 sec
Epoch 5/31  | train_loss: 0.79496 - val_loss: 0.84129 - val_rmse: 0.91722 - val_mae: 0.71274 - took 0.01 sec
Epoch 6/31  | train_loss: 0.77879 - val_loss: 0.83586 - val_rmse: 0.91425 - val_mae: 0.70986 - took 0.01 sec
Epoch 7/31  | train_loss: 0.76464 - val_loss: 0.83171 - val_rmse: 0.91198 - val_mae: 0.70768 - took 0.01 sec
Epoch 8/31  | train_loss: 0.75169 - val_loss: 0.82842 - val_rmse: 0.91018 - val_mae: 0.70600 - took 0.01 sec
Epoch 9/31  | train_loss: 0.73944 - val_loss: 0.82573 - val_rmse: 0.90870 - val_mae: 0.70465 - took 0.01 sec
E

### SVD from NicolasHug/Surprise

In [4]:
from surprise.prediction_algorithms import matrix_factorization
from surprise import Dataset
from surprise import Reader
from surprise import accuracy

import pandas as pd
import time


# train = pd.read_csv('../../data/rating_train.csv')
# test = pd.read_csv('../../data/rating_test.csv')
train = pd.read_csv('../movielens100k/rating_train.csv')
test = pd.read_csv('../movielens100k/rating_test.csv')

reader = Reader(rating_scale=(0.5,5))
train_set = Dataset.load_from_df(train[['userId','movieId','rating']],reader=reader)
test_set = Dataset.load_from_df(test[['userId','movieId','rating']],reader=reader)

trainset = train_set.build_full_trainset()
testset = test_set.build_full_trainset().build_testset()

t1 = time.time()

algo = matrix_factorization.SVD(n_factors=40, n_epochs=31)
algo.fit(trainset)

predictions = algo.test(testset)
accuracy.rmse(predictions)
accuracy.mae(predictions)

t2 = time.time()
print(t2 - t1)

RMSE: 0.9049
MAE:  0.6984
3.884911060333252
