In [1]:
import os
import pandas as pd
import numpy as np
import surprise

In [None]:
#!pip install scikit-surprise

In [2]:
data_dir='./ml-latest-small'

In [3]:
os.chdir(data_dir)

In [22]:
mr=pd.read_csv("ratings.csv")
mr.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [23]:
mr.drop('timestamp',axis=1,inplace=True)
mr.rename(columns={'userId':'user','movieId':'item','rating':'rating'},inplace=True)

In [24]:
# user, item, rating on scale of 1 to 5
reader=surprise.dataset.Reader(line_format='user item rating', rating_scale=(1,5))

In [25]:
mr_train=surprise.dataset.Dataset.load_from_df(mr,reader=reader)
mr_trainset=mr_train.build_full_trainset()

In [8]:
## Create a neighbourhood based user and item based collaborative filtering model
from surprise import SVD

In [26]:
model=SVD(n_factors=20)

In [27]:
model.fit(mr_trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x22f9443dd48>

In [28]:
mr.head()

Unnamed: 0,user,item,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [29]:
model.predict(uid=1,iid=31,r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.344355955862535, details={'was_impossible': False})

In [14]:
from surprise import NMF

In [30]:
model1=NMF(n_factors=20,biased=True,)

In [31]:
model1.fit(mr_trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x22f94440808>

In [32]:
model1.predict(uid=1,iid=31,r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.9604410474603307, details={'was_impossible': False})

In [37]:
## Evaluate Model performance at current values of hyperparameters

from surprise.model_selection import cross_validate

algo = SVD(n_factors=20)

results = cross_validate(algo=algo, data=mr_train, measures=['RMSE','MAE'], cv=5, return_train_measures=True)

In [38]:
print(results['test_rmse'].mean())

0.8910633121180643


In [40]:
print(results['test_mae'].mean())

0.6865755741984885


In [41]:
algo1 = NMF(n_factors=20,biased=True,)

results1 = cross_validate(algo=algo1, data=mr_train, measures=['RMSE','MAE'], cv=5, return_train_measures=True)

In [42]:
print(results1['test_rmse'].mean())

1.3370452357579312


In [43]:
print(results1['test_mae'].mean())

1.023444935215483


In [44]:
## Doing grid search for SVD model on number of factors
from surprise.model_selection import GridSearchCV

param_grid={"n_factors":[15,20,25,30]}
algo=SVD

In [45]:
grid_search = GridSearchCV(algo,param_grid, measures=['RMSE', 'MAE'], cv=5)
grid_search.fit(mr_train)

In [47]:
print(grid_search.best_params['rmse'])
print(grid_search.best_params['mae'])

{'n_factors': 20}
{'n_factors': 15}


In [48]:
print(grid_search.best_score['rmse'])
print(grid_search.best_score['mae'])

0.8907199100663318
0.6860485785820549
