In [1]:
# Import required libraries
import numpy as np
import pandas as pd
from tqdm import tqdm
from surprise import Dataset, SVD
from surprise.prediction_algorithms import KNNBasic
from surprise.model_selection import cross_validate
from surprise.reader import Reader
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read csv
ratings = pd.read_csv("../data/movies/ratings_small.csv")
print("Number of records:", ratings.shape)
ratings.head()

Number of records: (100004, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [3]:
# Reader object for surprise
reader = Reader(rating_scale = (1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [4]:
# Compute results for PMF (or equivalently SVD)
pmf_results = cross_validate(SVD(), data, measures = ['RMSE', 'MAE'], cv = 5, verbose = False)

# Compute and store mean of test rmse and mae
pmf_test_rmse = pmf_results['test_rmse'].mean()
pmf_test_mae = pmf_results['test_mae'].mean()

# Display results
print("Test RMSE for PMF:", pmf_test_rmse)
print("Test MAE for PMF:", pmf_test_mae)

Test RMSE for PMF: 0.8971645376887551
Test MAE for PMF: 0.6906904651027698


In [5]:
# Initialize the model (user based collaborative filtering (ubcf))
ubcf = KNNBasic(sim_options = {'name': 'cosine',
                             'user_based': True,
                             'min_support': True,
                             'min_k': 2})

# Compute results for user based collaborative filtering
ubcf_results = cross_validate(ubcf, data, measures = ['RMSE', 'MAE'], cv = 5, verbose = False)

# Compute and store mean of test rmse and mae
ubcf_rmse = ubcf_results['test_rmse'].mean()
ubcf_mae = ubcf_results['test_mae'].mean()

# Display results
print("Test RMSE for User Based Collaborative Filtering:", ubcf_rmse)
print("Test MAE for User Based Collaborative Filtering:", ubcf_mae)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Test RMSE for User Based Collaborative Filtering: 0.9929204474162363
Test MAE for User Based Collaborative Filtering: 0.766993666636894


In [6]:
# Initialize the model (item based collaborative filtering (ibcf))
ibcf = KNNBasic(sim_options = {'name': 'cosine',
                               'item_based': True,
                               'min_support': True,
                               'min_k': 2})

# Compute results for item based collaborative filtering
ibcf_results = cross_validate(ibcf, data, measures = ['RMSE', 'MAE'], cv = 5, verbose = False)

# Compute and store mean of test rmse and mae
ibcf_rmse = ibcf_results['test_rmse'].mean()
ibcf_mae = ibcf_results['test_mae'].mean()

# Display results
print("Test RMSE for User Based Collaborative Filtering:", ibcf_rmse)
print("Test MAE for User Based Collaborative Filtering:", ibcf_mae)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Test RMSE for User Based Collaborative Filtering: 0.9932244481930528
Test MAE for User Based Collaborative Filtering: 0.7674259819742985
