In [None]:
%config Completer.use_jedi = False

In [None]:
from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split

# Load dataset with built-in

In [None]:
data = Dataset.load_builtin(name='ml-100k')
train_set, test_set = train_test_split(data, test_size=0.25, random_state=0)

# Train an algorithm

In [None]:
algo = SVD()
algo.fit(train_set)

# Inspect result

In [None]:
predictions = algo.test(test_set)
print('prediction type: {}, size: {}'.format(type(predictions), len(predictions)))

In [None]:
print('Initial top-5 prediction')
predictions[:5]

In [None]:
[(pred.uid, pred.iid, pred.est) for pred in predictions[:3]]

In [None]:
uid = str(196)
iid = str(302)
pred = algo.predict(uid, iid, verbose=True)

# Accuracy

In [None]:
accuracy.rmse(predictions)

In [None]:
import pandas as pd

In [None]:
ratings = pd.read_csv('./ratings.csv')
ratings.to_csv('./ratings_noh.csv', index=False, header=False)
ratings.describe()

# Load datset with file

In [None]:
from surprise import Reader

In [None]:
reader = Reader(line_format='user item rating timestamp', sep=',', rating_scale=(0.5, 5))
data = Dataset.load_from_file(file_path='./ratings_noh.csv', reader=reader)

In [None]:
train_set, test_set = train_test_split(data, test_size=0.25, random_state=0)

In [None]:
algo = SVD(n_factors=50, random_state=0, verbose=True)
algo.fit(train_set)
predictions = algo.test(test_set)
accuracy.rmse(predictions)

# Load dataset with pandas

In [None]:
ratings = pd.read_csv('./ratings.csv')
reader = Reader(rating_scale=(0.5, 5))

In [None]:
data = Dataset.load_from_df(df=ratings[['userId', 'movieId', 'rating']], reader=reader)
train_set, test_set = train_test_split(data=data, test_size=0.25, random_state=0)

In [None]:
algo = SVD(n_factors=50, random_state=0, verbose=True)
algo.fit(train_set)

In [None]:
predictions = algo.test(test_set)
accuracy.rmse(predictions)