# Movie Recommendation System with Collaborative Filtering
<hr>

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from surprise import SVD, Reader, Dataset
from surprise.model_selection import cross_validate

In [3]:
df_rating = pd.read_csv('../movie-recommender-system/data/ratings_small.csv')
df_rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [4]:
# using svd (singular value decomposition)
svd = SVD()

In [5]:
reader = Reader()

In [7]:
# convert pd dataframe to surprise dataset
data = Dataset.load_from_df(df_rating[['userId', 'movieId', 'rating']], reader)

In [8]:
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8918  0.8989  0.8949  0.8976  0.8988  0.8964  0.0027  
MAE (testset)     0.6871  0.6929  0.6884  0.6898  0.6936  0.6904  0.0025  
Fit time          2.75    2.28    2.38    1.89    1.90    2.24    0.32    
Test time         0.23    0.39    0.17    0.33    0.16    0.26    0.09    


{'test_rmse': array([0.89176246, 0.89885846, 0.89493819, 0.89755543, 0.89882337]),
 'test_mae': array([0.68708373, 0.69291014, 0.68836901, 0.6897906 , 0.69364672]),
 'fit_time': (2.7454888820648193,
  2.2809855937957764,
  2.3806421756744385,
  1.8921527862548828,
  1.8959414958953857),
 'test_time': (0.22939562797546387,
  0.39391088485717773,
  0.16712069511413574,
  0.3329141139984131,
  0.16470718383789062)}

In [9]:
train_set = data.build_full_trainset()
svd.fit(train_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f6b93cde560>

In [11]:
# ratings given by userId 1 

df_rating[df_rating['userId'] == 1]

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151
6,1,1287,2.0,1260759187
7,1,1293,2.0,1260759148
8,1,1339,3.5,1260759125
9,1,1343,2.0,1260759131


In [12]:
# use svd to predict his score for movie_id 302
svd.predict(uid=1, iid=302, r_ui=None)

Prediction(uid=1, iid=302, r_ui=None, est=2.755377775420814, details={'was_impossible': False})