In [1]:
from surprise.prediction_algorithms.matrix_factorization import SVD
from surprise.dataset import Trainset, Dataset
from surprise.reader import Reader
from surprise import accuracy
from surprise.dump import dump, load
from utils import read_training_data, read_validation_data

# Load Data

In [2]:
train_set = read_training_data()
valid_set = read_validation_data()

## convert to surprise dataset

In [3]:
reader = Reader(sep=",", rating_scale=(0.5, 5.0))

In [4]:
dataset_train = Dataset.load_from_df(train_set, reader=reader)
dataset_train = dataset_train.build_full_trainset()
dataset_train_test = dataset_train.build_testset()

In [5]:
dataset_valid = Dataset.load_from_df(valid_set, reader=reader)
dataset_valid = dataset_valid.build_full_trainset()
dataset_valid_test = dataset_valid.build_testset()

# Collaborative Filtering Model

## Train with SVD algorithm

In [6]:
svd = SVD(n_factors=1, reg_bu=0.135, reg_bi=0.135, reg_pu=0.12, reg_qi=0.12, n_epochs=50, 
          random_state=20, verbose=True)

In [7]:
%%time
svd.fit(dataset_train)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Processing epoch 30
Processing epoch 31
Processing epoch 32
Processing epoch 33
Processing epoch 34
Processing epoch 35
Processing epoch 36
Processing epoch 37
Processing epoch 38
Processing epoch 39
Processing epoch 40
Processing epoch 41
Processing epoch 42
Processing epoch 43
Processing epoch 44
Processing epoch 45
Processing epoch 46
Processing epoch 47
Processing epoch 48
Processing epoch 49
CPU times:

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x412313c50>

## Evaluation: MSE

In [8]:
%%time
print("Training set")
predictions = svd.test(dataset_train_test)
mse_train = accuracy.mse(predictions)

Training set
MSE: 0.7060
CPU times: user 4min 16s, sys: 28 s, total: 4min 44s
Wall time: 4min 56s


In [9]:
%%time
print("Validation set")
predictions = svd.test(dataset_valid_test)
mse_valid = accuracy.mse(predictions)

Validation set
MSE: 0.7324
CPU times: user 25.4 s, sys: 6.07 s, total: 31.5 s
Wall time: 33.1 s


## Save model

In [10]:
dump("svd_k1_reg_b_0135_reg_r_012_epochs_50.pkl", algo=svd)