In [1]:
import numpy as np
import pandas as pd

In [2]:
ratings = pd.read_csv("data/cleaned_ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,110,1.0
1,1,147,4.5
2,1,858,5.0
3,1,1221,5.0
4,1,1246,5.0


In [3]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26024289 entries, 0 to 26024288
Data columns (total 3 columns):
 #   Column   Dtype  
---  ------   -----  
 0   userId   int64  
 1   movieId  int64  
 2   rating   float64
dtypes: float64(1), int64(2)
memory usage: 595.6 MB


In [4]:
ratings.isnull().sum()

userId     0
movieId    0
rating     0
dtype: int64

## Cross Validation

In [9]:
from surprise import Reader, Dataset
from surprise.model_selection import cross_validate

In [7]:
from surprise import SVD

In [8]:
reader = Reader()
data = Dataset.load_from_df(ratings, reader)
svd = SVD()

cross_validate(svd, data, measures=["RMSE"], cv=5)

{'test_rmse': array([0.79580141, 0.79660654, 0.79689153, 0.7960048 , 0.79610702]),
 'fit_time': (187.3638198375702,
  197.99534845352173,
  198.27649307250977,
  197.86805891990662,
  198.19916653633118),
 'test_time': (68.26660251617432,
  64.60137152671814,
  69.76208901405334,
  70.14395999908447,
  54.73418974876404)}

## SVD Collaborative filter model

In [13]:
reader = Reader()
data = Dataset.load_from_df(ratings, reader)
svd = SVD()
trainset = data.build_full_trainset()

In [14]:
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7a9972ea0790>

In [20]:
svd.predict(1, 858)

Prediction(uid=1, iid=858, r_ui=None, est=4.581086731562962, details={'was_impossible': False})

In [2]:
## save the model
from surprise import dump 
model_filename = "svd_model.pkl"

dump.dump(model_filename, algo=svd)

NameError: name 'svd' is not defined

## Load the model

In [1]:
from surprise import dump 
model_filename = "svd_model.pkl"
loaded_model = dump.load(model_filename)[1]
loaded_model.predict(1, 858)

Prediction(uid=1, iid=858, r_ui=None, est=4.581086731562962, details={'was_impossible': False})

In [6]:
prediction = loaded_model.predict(1, 862)
prediction

Prediction(uid=1, iid=862, r_ui=None, est=4.283547149553264, details={'was_impossible': False})

In [9]:
type(prediction.uid)

int

In [10]:
result = {
    "uid": prediction.uid,
    "iid": prediction.iid,
    "est": prediction.est,
    "details": prediction.details
}

In [11]:
result

{'uid': 1,
 'iid': 862,
 'est': 4.283547149553264,
 'details': {'was_impossible': False}}