# Collaborative-Based Filtering

### Load the data

In [6]:
import pandas as pd

ratings = pd.read_csv("ratings.csv")
ratings.head(6)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151


### Create the dataset

In [7]:
from surprise import Dataset, Reader

reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(ratings[["userId", "movieId", "rating"]], reader)
dataset

<surprise.dataset.DatasetAutoFolds at 0x167fea9d310>

### Build the trainset

In [8]:
trainset = dataset.build_full_trainset()

In [13]:
list(trainset.all_ratings())[0:5]

[(0, 0, 2.5), (0, 1, 3.0), (0, 2, 3.0), (0, 3, 2.0), (0, 4, 4.0)]

### Train the Model

In [14]:
from surprise import SVD

svd = SVD()

In [15]:
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x16782705a90>

#### svd.predict(User ID, Movie ID)

In [16]:
svd.predict(15, 1956)

Prediction(uid=15, iid=1956, r_ui=None, est=3.2925457862493426, details={'was_impossible': False})

In [17]:
svd.predict(15, 1956).est

3.2925457862493426

### Validation

In [18]:
from surprise import model_selection

model_selection.cross_validate(svd, dataset, measures=["RMSE", "MAE"])

{'test_rmse': array([0.89691721, 0.89639786, 0.89868908, 0.8930655 , 0.89992917]),
 'test_mae': array([0.69141483, 0.69083726, 0.69270901, 0.6876949 , 0.68661366]),
 'fit_time': (0.9087185859680176,
  0.9764759540557861,
  1.0361089706420898,
  1.055572509765625,
  1.0274276733398438),
 'test_time': (0.11502528190612793,
  0.12401032447814941,
  0.2189922332763672,
  0.10871767997741699,
  0.257343053817749)}