# 

# Collaborative Based Data Filtering

## Loading the Data

In [1]:
import pandas as pd

In [19]:
ratings = pd.read_csv("ratings.csv")[["userId", "movieId", "rating"]]
ratings.head(10)

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0
5,1,1263,2.0
6,1,1287,2.0
7,1,1293,2.0
8,1,1339,3.5
9,1,1343,2.0


## Create the dataset

In [22]:
from surprise import Dataset, Reader

reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(ratings, reader)
dataset

<surprise.dataset.DatasetAutoFolds at 0x7f56b8ccf100>

## Build the train set

In [31]:
trainset = dataset.build_full_trainset()
list(trainset.all_ratings())

[(0, 0, 2.5),
 (0, 1, 3.0),
 (0, 2, 3.0),
 (0, 3, 2.0),
 (0, 4, 4.0),
 (0, 5, 2.0),
 (0, 6, 2.0),
 (0, 7, 2.0),
 (0, 8, 3.5),
 (0, 9, 2.0),
 (0, 10, 2.5),
 (0, 11, 1.0),
 (0, 12, 4.0),
 (0, 13, 4.0),
 (0, 14, 3.0),
 (0, 15, 2.0),
 (0, 16, 2.0),
 (0, 17, 2.5),
 (0, 18, 1.0),
 (0, 19, 3.0),
 (1, 20, 4.0),
 (1, 21, 5.0),
 (1, 22, 5.0),
 (1, 23, 4.0),
 (1, 24, 4.0),
 (1, 25, 3.0),
 (1, 26, 3.0),
 (1, 27, 4.0),
 (1, 28, 3.0),
 (1, 29, 5.0),
 (1, 30, 4.0),
 (1, 31, 3.0),
 (1, 32, 3.0),
 (1, 33, 3.0),
 (1, 34, 3.0),
 (1, 35, 3.0),
 (1, 36, 3.0),
 (1, 37, 5.0),
 (1, 38, 1.0),
 (1, 39, 3.0),
 (1, 40, 3.0),
 (1, 41, 3.0),
 (1, 42, 4.0),
 (1, 43, 4.0),
 (1, 44, 5.0),
 (1, 45, 5.0),
 (1, 46, 3.0),
 (1, 47, 4.0),
 (1, 48, 3.0),
 (1, 49, 4.0),
 (1, 50, 3.0),
 (1, 51, 4.0),
 (1, 52, 2.0),
 (1, 53, 1.0),
 (1, 54, 3.0),
 (1, 55, 4.0),
 (1, 56, 4.0),
 (1, 57, 3.0),
 (1, 58, 3.0),
 (1, 59, 3.0),
 (1, 60, 3.0),
 (1, 61, 2.0),
 (1, 62, 3.0),
 (1, 63, 3.0),
 (1, 64, 3.0),
 (1, 65, 3.0),
 (1, 66, 2.0),
 (1, 

## Training the ML Model

In [37]:
from surprise import SVD
svd = SVD()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f56e857aaa0>

In [46]:
svd.predict(15, 1956)

Prediction(uid=15, iid=1956, r_ui=None, est=3.1296820974085002, details={'was_impossible': False})

## Validation

In [52]:
from surprise import model_selection
model_selection.cross_validate(svd, dataset, measures=["RMSE", "MAE"])

{'test_rmse': array([0.89205751, 0.90233284, 0.89565107, 0.89907563, 0.8971807 ]),
 'test_mae': array([0.68807994, 0.69594269, 0.68957925, 0.69122961, 0.68988115]),
 'fit_time': (0.9144501686096191,
  0.9778425693511963,
  0.9343883991241455,
  0.8997642993927002,
  0.9351685047149658),
 'test_time': (0.09957480430603027,
  0.08634543418884277,
  0.09081459045410156,
  0.10336542129516602,
  0.08677816390991211)}

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=d73c100a-4691-424c-8593-2a77917e5676' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>