<a href="https://colab.research.google.com/github/robert-myers/myanimelist-recommender/blob/master/notebooks/algorithm_comparisons.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
pip install surprise

In [0]:
import numpy as np
import pandas as pd
import random

from surprise import accuracy, AlgoBase, BaselineOnly, Dataset, NormalPredictor, Reader, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, train_test_split

my_seed = 8182868
random.seed(my_seed)
np.random.seed(my_seed)

In [0]:
train = pd.read_csv("https://s3.us-east-2.amazonaws.com/my.anime.list.sagemaker/sagemaker/train_sagemaker_one_percent.csv")[["userID", "itemID", "my_score"]]

In [0]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(train, reader)
trainset, testset = train_test_split(data)

In [5]:
%%time
algo = NMF()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions), accuracy.mae(predictions)

RMSE: 2.4896
MAE:  2.1012
CPU times: user 12.9 s, sys: 14.9 ms, total: 12.9 s
Wall time: 13 s


In [6]:
%%time
algo = NormalPredictor()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions), accuracy.mae(predictions)

RMSE: 2.3192
MAE:  1.8377
CPU times: user 431 ms, sys: 11.9 ms, total: 443 ms
Wall time: 446 ms


In [7]:
%%time
class MeanAlgo(AlgoBase):

    def __init__(self):
        AlgoBase.__init__(self)

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)
        self.the_mean = trainset.global_mean
        return self
        
    def estimate(self, u, i):
        return self.the_mean

algo = MeanAlgo()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions), accuracy.mae(predictions)

RMSE: 1.6962
MAE:  1.3343
CPU times: user 353 ms, sys: 7.04 ms, total: 360 ms
Wall time: 362 ms


In [9]:
%%time
algo = SVDpp()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions), accuracy.mae(predictions)

RMSE: 1.5240
MAE:  1.1696
CPU times: user 17.2 s, sys: 9.52 ms, total: 17.2 s
Wall time: 17.4 s


In [10]:
%%time
algo = BaselineOnly()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions), accuracy.mae(predictions)

Estimating biases using als...
RMSE: 1.5221
MAE:  1.1578
CPU times: user 1.05 s, sys: 13.9 ms, total: 1.06 s
Wall time: 1.07 s


In [8]:
%%time
algo = SVD()
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions), accuracy.mae(predictions)

RMSE: 1.5129
MAE:  1.1503
CPU times: user 8.57 s, sys: 8.48 ms, total: 8.58 s
Wall time: 8.65 s
