In [1]:
# install packages
import sys

!conda install --yes --prefix {sys.prefix} -c conda-forge scikit-surprise

Solving environment: done


  current version: 4.4.9
  latest version: 4.6.4

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.



# Lab 9: Recommendation System

In this lab, we will use a python package named [Surprise](http://surpriselib.com/), which is an easy-to-use Python scikit for recommendation systems. It includes several commonly used algorithms, including [collaborative filtering](https://surprise.readthedocs.io/en/stable/knn_inspired.html) and [Matrix Factorization-based algorithms](https://surprise.readthedocs.io/en/stable/matrix_factorization.html).

In [2]:
from surprise.prediction_algorithms.matrix_factorization import SVD
from surprise.prediction_algorithms.knns import KNNBasic
from surprise.prediction_algorithms.knns import KNNWithMeans
from surprise.prediction_algorithms.knns import KNNBaseline
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV

## Load data from package surprise 

First, we can download the dataset included in package surprise. The data will be saved in the .surprise_data folder in your home directory.

In [3]:
# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin('ml-100k')

# sample random trainset and testset where test set is made of 20% of the ratings.
trainset, testset = train_test_split(data, test_size=.20)

## Collaborative Filtering

First, we will apply three different flavors of collaborative filtering to this data.

### The basic collaborative filtering algorithm

In [4]:
# Use the basic collaborative filtering algorithm. 
# See https://surprise.readthedocs.io/en/stable/knn_inspired.html for more details.
cf = KNNBasic()

# Train the algorithm on the trainset, and predict ratings for the testset
cf.fit(trainset)
predictions = cf.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)
accuracy.mae(predictions)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9830
MAE:  0.7780


0.7780061513073939

### The basic collaborative filtering algorithm with user mean ratings

In [5]:
# Use the basic collaborative filtering algorithm, taking into account the mean ratings of each user.
# See https://surprise.readthedocs.io/en/stable/knn_inspired.html for more details.
cf_mean = KNNWithMeans()

# Train the algorithm on the trainset, and predict ratings for the testset
cf_mean.fit(trainset)
predictions = cf_mean.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)
accuracy.mae(predictions)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9531
MAE:  0.7523


0.7523261759504346

### The basic collaborative filtering algorithm with baseline ratings

In [None]:
# Use the basic collaborative filtering algorithm taking into account a baseline rating.
# See https://surprise.readthedocs.io/en/stable/knn_inspired.html for more details.
cf_baseline = KNNBaseline(k=50)

# Train the algorithm on the trainset, and predict ratings for the testset
cf_baseline.fit(trainset)
predictions = cf_baseline.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)
accuracy.mae(predictions)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9323
MAE:  0.7351


0.7350675933541603

## Matrix Factorization

Then, we will apply the matrix factorization to this data.

In [None]:
# We'll use the famous SVD algorithm.
svd = SVD(n_factors=4)

# Train the algorithm on the trainset, and predict ratings for the testset
svd.fit(trainset)
predictions = svd.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)
accuracy.mae(predictions)

# End of Lab 9: Recommendation System