In [1]:
from surprise import accuracy, Dataset, Reader, SVD, SVDpp, SlopeOne, NMF, NormalPredictor, KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore, BaselineOnly, CoClustering
from surprise.model_selection import train_test_split
import pandas as pd

In [2]:
movies = pd.read_csv('../../Desktop/movies.csv')
ratings = pd.read_csv('../../Desktop/ratings.csv')

In [3]:
df = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)

In [4]:
df.dropna(inplace=True)

In [5]:
dataset = pd.DataFrame({
    'uid': df.userId,
    'iid': df.movieId,
    'rating': df.rating
})

In [6]:
dataset.head()

Unnamed: 0,uid,iid,rating
0,1.0,1,4.0
1,5.0,1,4.0
2,7.0,1,4.5
3,15.0,1,2.5
4,17.0,1,4.5


In [7]:
reader = Reader(rating_scale=(dataset['rating'].min(), dataset['rating'].max()))
data = Dataset.load_from_df(dataset, reader)

In [8]:
trainset, testset = train_test_split(data, test_size=.2)

In [9]:
algorithm_list = [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), 
                  KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]

In [12]:
def res(algorithm):
    algo = algorithm
    algo.fit(trainset)
    predictions = algo.test(testset)
    print('Algorithm: {}\n-----\n'.format(str(algorithm).split('.')[3].split(' ')[0], accuracy.rmse(predictions)))

In [13]:
for algorithm in algorithm_list:
    res(algorithm)

RMSE: 0.8635
Algorithm: SVD
-----

RMSE: 0.8523
Algorithm: SVDpp
-----

RMSE: 0.8927
Algorithm: SlopeOne
-----

RMSE: 0.9180
Algorithm: NMF
-----

RMSE: 1.4286
Algorithm: NormalPredictor
-----

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.8680
Algorithm: KNNBaseline
-----

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9388
Algorithm: KNNBasic
-----

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.8898
Algorithm: KNNWithMeans
-----

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.8897
Algorithm: KNNWithZScore
-----

Estimating biases using als...
RMSE: 0.8617
Algorithm: BaselineOnly
-----

RMSE: 0.9381
Algorithm: CoClustering
-----

