In [1]:
import sys

sys.path.append("../")

In [2]:
import pandas as pd
import numpy as np

In [3]:
users = 226570
items = 231637
genres = 552

train_data = pd.read_csv("../Preprocessing/processed_dataframes/train.csv")
validation_data = pd.read_csv("../Preprocessing/processed_dataframes/val.csv")
test_data = pd.read_csv("../Preprocessing/processed_dataframes/test.csv")

In [4]:
uir_train = train_data.values
uir_val = validation_data.values
uir_test = test_data.values

In [5]:
def rmse(uir_val, predict_func, **kwargs):
    val_preds = predict_func(uir_val[:, :2], **kwargs)
    val_expected = uir_val[:, 2]

    return np.sqrt(1/len(uir_val) * np.sum((val_preds - val_expected)**2))

## SVD

In [6]:
from SVD.train_funcs.svd import fit_svd, predict_batch_svd

In [7]:
fitted_params_svd = fit_svd(
    uir_train, uir_val, users, items, k=4, 
    α1=0.005, α2=0.005, α3=0.005, α4=0.005, λ1=0.01, λ2=0.1,
    n_iters=75,
)

print(rmse(uir_val, predict_batch_svd, params=fitted_params_svd))

  0%|          | 0/75 [00:00<?, ?it/s]

0.8986880672273672


In [8]:
print("Test SVD")
print(rmse(uir_test, predict_batch_svd, params=fitted_params_svd))

Test SVD
0.8960176503177786


## SVDpp

In [9]:
from SVD.train_funcs.svdpp import fit_svdpp, predict_batch_svdpp, get_items_rated_by_users

In [10]:
irbu = get_items_rated_by_users(uir_train, users)

In [11]:
fitted_params_svdpp = fit_svdpp(
    uir_train, uir_val, irbu, users, items, k=4, 
    α1=0.005, α2=0.005, α3=0.005, α4=0.005, α5=0.005, λ1=0.1, λ2=0.1,
    n_iters=23,
)

print(rmse(uir_val, predict_batch_svdpp, params=fitted_params_svdpp, Rus=irbu))

  0%|          | 0/23 [00:00<?, ?it/s]

0.8988981915952432


In [12]:
print("Test SVD++")
print(rmse(uir_test, predict_batch_svdpp, params=fitted_params_svdpp, Rus=irbu))

Test SVD++
0.8961480229282277


## gSVDpp

In [13]:
from SVD.train_funcs.gsvdpp import fit_gsvdpp, predict_batch_gsvdpp, get_tags_item_belongs_to
import scipy.sparse as sparse

In [14]:
n_tags = 552
tags_mat = sparse.load_npz("../Preprocessing/objects/tags_matrix.npz")

tibt = get_tags_item_belongs_to(tags_mat, items)

In [15]:
fitted_params_gsvdpp = fit_gsvdpp(
    uir_train, uir_val, irbu, tibt, users, items, n_tags, k=5, 
    α1=0.006, α2=0.006, α3=0.005, α4=0.005, α5=0.005, α6=0.006, 
    λ1=0.01, λ2=0.1, λ3=0.01, λ4=0.1, λ5=0.01, λ6=0.1,
    n_iters=23,
)

print(rmse(uir_val, predict_batch_gsvdpp, params=fitted_params_gsvdpp, Rus=irbu, Gis=tibt))

  0%|          | 0/23 [00:00<?, ?it/s]

0.8988595146270146


In [16]:
print("Test gSVD++")
print(rmse(uir_test, predict_batch_gsvdpp, params=fitted_params_gsvdpp, Rus=irbu, Gis=tibt))

Test gSVD++
0.8963188344120626
