In [1]:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import joblib
from surprise import NMF, SVD, SVDpp
from surprise import Dataset
from surprise.accuracy import mae as MAE
from surprise.accuracy import mse as MSE
from surprise.model_selection import train_test_split

seed=12

## Hyperparameters

In [2]:
# Set for training
featureNum = 10
lr = 2e-3
epochs = 50

# Set for regularization
regRate = 1e-2

## Load and Split Data

In [3]:
data = Dataset.load_builtin("ml-100k")
trainSet, validSet = train_test_split(data, test_size=0.2, random_state=seed)

In [4]:
print(f"{trainSet.n_users} users, {trainSet.n_items} items, and {trainSet.n_ratings} ratings in train")
print(f"Missing rate of train: {(1-trainSet.n_ratings/(trainSet.n_users*trainSet.n_items))*100:.3f}%")

943 users, 1651 items, and 80000 ratings in train
Missing rate of train: 94.862%


In [5]:
numValidRates = len(validSet)
print(f"{numValidRates} ratings in valid")
print(f"Missing rate of valid: {(1-numValidRates/(trainSet.n_users*trainSet.n_items))*100:.3f}%")

20000 ratings in valid
Missing rate of valid: 98.715%


## Train and Validate FunkSVD using MSE and MAE

In [6]:
funksvd = SVD(n_factors=featureNum, n_epochs=epochs, lr_all=lr, reg_all=regRate, random_state=seed)
funksvd.fit(trainSet)

# Save model
with open("surprise/funksvd.joblib", "wb") as f:
    joblib.dump(funksvd, f)

# Evaluate
predSvd = funksvd.test(validSet)
mse = MSE(predSvd)
mae = MAE(predSvd)

MSE: 0.8645
MAE:  0.7316


## Train and Validate SVD++ using MSE and MAE

In [7]:
svdpp = SVDpp(n_factors=featureNum, n_epochs=epochs, lr_all=lr, reg_all=regRate, random_state=seed)
svdpp.fit(trainSet)

# Save model
with open("surprise/svdpp.joblib", "wb") as f:
    joblib.dump(svdpp, f)

# Evaluate
predSvdpp = svdpp.test(validSet)
mse = MSE(predSvdpp)
mae = MAE(predSvdpp)

MSE: 0.8267
MAE:  0.7121


## Train and Validate NMF using MSE and MAE

In [8]:
nmf = NMF(n_factors=featureNum, n_epochs=epochs, reg_pu=lr, reg_qi=lr, biased=True, reg_bu=regRate, reg_bi=regRate, lr_bu=regRate, lr_bi=regRate, random_state=seed)
nmf.fit(trainSet)

# Save model
with open("surprise/nmf.joblib", "wb") as f:
    joblib.dump(nmf, f)

# Evaluate
predNmf = nmf.test(validSet)
mse = MSE(predNmf)
mae = MAE(predNmf)

MSE: 0.9592
MAE:  0.7623
