# **CUHK-STAT3009** Notebook - MF-based Models


In [19]:
# Load and pro-processed dataset
import numpy as np
import pandas as pd

## Upload Netflix dataset in CUHK-STAT3009 Github repo

train_url = "https://raw.githubusercontent.com/statmlben/CUHK-STAT3009/main/dataset/train.csv"
test_url = "https://raw.githubusercontent.com/statmlben/CUHK-STAT3009/main/dataset/test.csv"

dtrain = pd.read_csv(train_url)
dtest = pd.read_csv(test_url)

train_rating = dtrain['rating'].values
train_rating = np.array(train_rating, dtype=float)
train_pair = dtrain[['user_id', 'movie_id']].values

test_rating = dtest['rating'].values
test_rating = np.array(test_rating, dtype=float)
test_pair = dtest[['user_id', 'movie_id']].values

## Take a subset for illustration: item_id < 200
train_rating = train_rating[train_pair[:,1] < 200]
train_pair = train_pair[train_pair[:,1] < 200]
test_rating = test_rating[test_pair[:,1] < 200]
test_pair = test_pair[test_pair[:,1] < 200]

n_user = max( max(train_pair[:,0]), max(test_pair[:,0]) ) + 1
n_item = max( max(train_pair[:,1]), max(test_pair[:,1]) ) + 1

def rmse(test_rating, pred_rating):
    return np.sqrt( np.mean( (pred_rating - test_rating)**2) )

In [20]:
print('number of ratings in the subset: %d' %len(train_pair))

number of ratings in the subset: 2400


## **N-SVD**

- Model:
	$$
  \widehat{r}_{ui} = \mathbf{q}_i^\intercal \big( \tau_u \sum_{i' \in \mathcal{I}_u} \mathbf{w}_{i'} \big)
	$$

- Formulation:
  $$
  \min_{\mathbf{Q}, \mathbf{W}} \frac{1}{|\Omega|} \sum_{(u,i) \in \Omega} \Big( r_{ui} - \mathbf{q}_i^\intercal \big( \tau_u \sum_{i' \in \mathcal{I}_u} \mathbf{w}_{i'} \big) \Big)^2 + \lambda \sum_{i=1}^m \big( \|\mathbf{q}_i\|_2^2 + \|\mathbf{w}_i\|_2^2 \big)
  $$

- ALGO: BCD

  - (Q-update)
  $$\mathbf{p}^{(l)}_u = \tau_u \sum_{i' \in \mathcal{I}_u} \mathbf{w}^{(l)}_{i'}, \quad \text{for } u = 1, \cdots, n$$
  - For $i = 1, \cdots, m$
$$ \mathbf{q}^{(l+1)}_i = ( \sum_{u \in \mathcal{U}_i} \mathbf{p}^{(l)}_u (\mathbf{p}^{(l)}_u)^T + \lambda |\Omega| \mathbf{I})^{-1} \sum_{u \in \mathcal{U}_i} r_{ui} \mathbf{p}^{(l)}_u $$
  - (W-update)
  - For $i = 1, \cdots, m$
$$\tilde{r}_{ui} = r_{ui} - \tau_u (\mathbf{q}^{(l)}_i)^\intercal \big( \sum_{j' \in \mathcal{I}_u; j' \neq j} \mathbf{w}^{(l)}_{j'} \big)$$
$$\mathbf{w}^{(l+1)}_{i} = \big( \sum_{ u: j \in \mathcal{I}_u } \frac{1}{|\mathcal{I}_u|} \sum_{i \in \mathcal{I}_u} \mathbf{q}^{(l+1)}_i {(\mathbf{q}^{(l+1)}_i)^\intercal} + \lambda |\Omega| \mathbf{I} \big)^{-1} \Big( \sum_{u: j \in \mathcal{I}_u } \tau_u \sum_{i \in \mathcal{I}_u} \tilde{r}_{ui} \mathbf{q}^{(l+1)}_i \Big)
	$$

### Make a `skikit-learn` compatible class


- **Parameter**:
    - #Users: `n`
    - #Items: `m`
    - latent factors for item2user: `W`
    - latent factors for items: `Q`
    - l2-weight (*hp*): `lam`
    - #Latent factors (*hp*): `K`

- **Method**:

  - `fit`: 
    - *input*: `train_pair`, `train_rating`
    - *output*: fitted P and W
  - `predict`: 
    - *input*: `test_pair`
    - *output*: predicted ratings
  - `rmse`: 
    - *input*: `test_pair`, `test_rating`
    - *output*: RMSE for the predicted ratings

  - `obj`: 
    - *input*: `test_pair`, `test_rating`
    - *output*: objective function for the MF method

  - `GenP`:
    - Generate fake P from current W
  
  - `update_W` and `update_Q`

In [21]:
class NSVD(object):

    def __init__(self, n_user, n_item, lam=.001, K=10, iterNum=50, tol=1e-4, verbose=1, rd_ratio=.5):
        # self.P = np.random.randn(n_user, K)
        self.Q = np.random.randn(n_item, K)
        self.W = np.random.randn(n_item, K)
        # self.index_item = []
        # self.index_user = []
        self.n_user = n_user
        self.n_item = n_item
        self.lam = lam
        self.K = K
        self.iterNum = iterNum
        self.tol = tol
        self.verbose = verbose
        self.fake_P = np.random.randn(n_user, K)
        self.rd_ratio=rd_ratio

    def fit_Q(self, train_pair, train_rating):
        n_user, n_item, n_obs = self.n_user, self.n_item, len(train_pair)
        K, iterNum, lam = self.K, self.iterNum, self.lam
        for item_id in range(n_item):
            index_item_tmp = self.index_item[item_id]
            if len(index_item_tmp) == 0:
                self.Q[item_id,:] = 0.
                continue
            ## compute `sum_pu` and `sum_matrix`
            sum_pu, sum_matrix = np.zeros((K)), np.zeros((K, K))
            for record_ind in index_item_tmp:
                ## double-check
                if item_id != train_pair[record_ind][1]:
                    raise ValueError('the item_id is worning in updating Q!')
                user_id, rating_tmp = train_pair[record_ind][0], train_rating[record_ind]
                sum_matrix = sum_matrix + np.outer(self.fake_P[user_id,:], self.fake_P[user_id,:])
                sum_pu = sum_pu + rating_tmp * self.fake_P[user_id,:]                    
            self.Q[item_id,:] = np.dot(np.linalg.inv(sum_matrix + lam*n_obs*np.identity(K)), sum_pu)
        return self.obj(test_pair=train_pair, test_rating=train_rating)

    def fit_W(self, train_pair, train_rating):
        K, lam, n_obs = self.K, self.lam, len(train_pair)
        Q_out = np.array([np.outer(self.Q[j,:], self.Q[j,:]) for j in range(self.n_item)])
        for item_id in range(n_item):
            # print('Fitting Reg-NSVD: Block W-%d' %item_id)
            ## Users give rating to `item_id`
            U_j = self.index_item[item_id]
            if len(U_j) == 0:
              self.W[item_id,:] = np.zeros(self.K)
              continue
            pred_tmp = self.Q @ self.W[item_id]
            train_rating_res = train_rating - self.predict(train_pair) + pred_tmp[train_pair[:,1]] /np.sqrt(self.vol_I_u[train_pair[:,0]])
            ## compute `sum_pu` and `sum_matrix`
            sum_qi, sum_matrix = np.zeros((K)), np.zeros((K, K))
            
            for idx1 in np.random.choice(U_j, min(int(self.rd_ratio*len(U_j)), len(U_j)), replace=False):
                user_id = train_pair[idx1, 0]
                I_u = self.index_user[user_id]

                for idx2 in I_u:
                    item_j = train_pair[idx2, 1]
                    sum_matrix += Q_out[item_j] / self.vol_I_u[user_id]
                    sum_qi += train_rating_res[idx2] * self.Q[item_j,:] / np.sqrt(self.vol_I_u[user_id])
            self.W[item_id,:] = np.dot(np.linalg.inv(sum_matrix + lam*n_obs*np.identity(K)), sum_qi)
            self.GenP(train_pair)
        return self.obj(test_pair=train_pair, test_rating=train_rating)


    def fit(self, train_pair, train_rating):
        diff, tol = 1., self.tol
        n_user, n_item, n_obs = self.n_user, self.n_item, len(train_pair)
        K, iterNum, lam = self.K, self.iterNum, self.lam
        ## store user/item index set
        self.index_item = [np.where(train_pair[:,1] == i)[0] for i in range(n_item)]
        self.index_user = [np.where(train_pair[:,0] == u)[0] for u in range(n_user)]
        ## compute the volumne of user index
        self.vol_I_u = np.array([len(self.index_user[u]) for u in range(self.n_user)]) + 1

        obj = self.obj(test_pair=train_pair, test_rating=train_rating)

        if self.verbose:
            print('Fitting Reg-NSVD: K: %d, lam: %.5f' %(K, lam))
        
        for i in range(iterNum):
            ## Q update
            obj_old = obj
            obj = self.fit_Q(train_pair, train_rating)
            # print("Reg-NSVD: Q update ite: %d; Obj: %.3f" %(i, obj))
            ## W update
            obj = self.fit_W(train_pair, train_rating)
            
            diff = abs(obj_old - obj) / obj_old
            if self.verbose:
                print("Reg-NSVD: ite: %d; diff: %.3f Obj: %.3f" %(i, diff, obj))
            if(diff < tol):
                break

    def GenP(self, train_pair):
        for u in range(self.n_user):
          I_u_tmp = self.index_user[u]
          if len(I_u_tmp) == 0:
            self.fake_P[u] = 0
          else:
            self.fake_P[u,:] = np.sum(self.W[train_pair[I_u_tmp, 1]], 0) / np.sqrt(self.vol_I_u[u])

    def predict(self, test_pair):
        # predict ratings for user-item pairs
        pred_rating = [np.dot(self.fake_P[line[0]], self.Q[line[1]]) for line in test_pair]
        return np.array(pred_rating)
    
    def rmse(self, test_pair, test_rating):
        # report the rmse for the fitted `MF`
        pred_rating = self.predict(test_pair=test_pair)
        return np.sqrt( np.mean( (pred_rating - test_rating)**2) )
      
    def obj(self, test_pair, test_rating):
        return (self.rmse(test_pair, test_rating))**2 + self.lam*np.sum(self.W**2) + self.lam*np.sum(self.Q**2)

In [22]:
# fitting
cue = NSVD(n_user, n_item, K=3, lam=.0001, rd_ratio=1.)
cue.fit(train_pair=train_pair, train_rating=train_rating)

Fitting Reg-NSVD: K: 3, lam: 0.00010
Reg-NSVD: ite: 0; diff: 0.460 Obj: 9.882
Reg-NSVD: ite: 1; diff: 0.727 Obj: 2.696
Reg-NSVD: ite: 2; diff: 0.294 Obj: 1.905
Reg-NSVD: ite: 3; diff: 0.142 Obj: 1.635
Reg-NSVD: ite: 4; diff: 0.080 Obj: 1.505
Reg-NSVD: ite: 5; diff: 0.044 Obj: 1.438
Reg-NSVD: ite: 6; diff: 0.030 Obj: 1.396
Reg-NSVD: ite: 7; diff: 0.023 Obj: 1.364
Reg-NSVD: ite: 8; diff: 0.020 Obj: 1.336
Reg-NSVD: ite: 9; diff: 0.017 Obj: 1.314
Reg-NSVD: ite: 10; diff: 0.013 Obj: 1.297
Reg-NSVD: ite: 11; diff: 0.009 Obj: 1.285
Reg-NSVD: ite: 12; diff: 0.008 Obj: 1.275
Reg-NSVD: ite: 13; diff: 0.007 Obj: 1.266
Reg-NSVD: ite: 14; diff: 0.007 Obj: 1.257
Reg-NSVD: ite: 15; diff: 0.007 Obj: 1.248
Reg-NSVD: ite: 16; diff: 0.007 Obj: 1.239
Reg-NSVD: ite: 17; diff: 0.007 Obj: 1.230
Reg-NSVD: ite: 18; diff: 0.007 Obj: 1.221
Reg-NSVD: ite: 19; diff: 0.006 Obj: 1.214
Reg-NSVD: ite: 20; diff: 0.006 Obj: 1.207
Reg-NSVD: ite: 21; diff: 0.005 Obj: 1.202
Reg-NSVD: ite: 22; diff: 0.004 Obj: 1.197
Reg-NSV

In [23]:
# pediction
pred_rating = cue.predict(test_pair)
pred_train_rating = cue.predict(train_pair)
print('train rmse: %.3f; test rmse: %.3f' %(rmse(train_rating, pred_train_rating), rmse(test_rating, pred_rating)))

train rmse: 1.000; test rmse: 2.729


## **SVD++**

- Model
  $$
  \widehat{r}_{ui} = \mathbf{q}_i^\intercal \big( \mathbf{p}_u + \tau_u \sum_{j \in \mathcal{I}_u} \mathbf{w}_{j} \big).
  $$

- Formulation
$$
\min_{\mathbf{P}, \mathbf{Q}, \mathbf{W}} \frac{1}{|\Omega|} \sum_{(u,i) \in \Omega} \Big( r_{ui} - \mathbf{q}_i^\intercal \big( \mathbf{p}_u + \tau_u \sum_{j \in \mathcal{I}_u} \mathbf{w}_{j} \big) \Big)^2 + \lambda \Big( \sum_{i=1}^m \big( \|\mathbf{q}_i\|_2^2 + \|\mathbf{w}_i\|_2^2 \big) + \sum_{u=1}^n \|\mathbf{p}_u\|_2^2 \Big)
$$

- ALGO (BCD)

  - (P update)
  $$ \tilde{r}_{ui} - r_{ui} - \tau_u (\mathbf{q}^{(l)}_i)^\intercal \sum_{j \in \mathcal{I}_u} \mathbf{w}^{(l)}_j $$

    - For $u = 1, \cdots, n$

  $$ \hat{\mathbf{p}}^{(l+1)}_u = ( \sum_{i \in \mathcal{I}_u} \mathbf{q}^{(l)}_i (\mathbf{q}^{(l)}_i)^\intercal + \lambda |\Omega| \mathbf{I})^{-1} \sum_{i \in \mathcal{I}_u} r_{ui} \mathbf{q}^{(l)}_i $$

  - (Q update)
  $$\tilde{\mathbf{p}}^{(l)}_u = \mathbf{p}^{(l+1)}_u + \tau_u \sum_{i' \in \mathcal{I}_u} \mathbf{w}^{(l)}_{i'}, \quad \text{for } u = 1, \cdots, n$$

    - For $i = 1, \cdots, m$

  $$ \mathbf{q}^{(l+1)}_i = ( \sum_{u \in \mathcal{U}_i} \mathbf{p}^{(l)}_u (\mathbf{p}^{(l)}_u)^T + \lambda |\Omega| \mathbf{I})^{-1} \sum_{u \in \mathcal{U}_i} r_{ui} \mathbf{p}^{(l)}_u $$

  - (W update)

  $$ \tilde{r}_{ui} = r_{ui} - (\mathbf{q}^{(l+1)}_i)^\intercal \mathbf{p}_u^{(l+1)} - \tau_u (\mathbf{q}^{(l)}_i)^\intercal \big( \sum_{j' \in \mathcal{I}_u; j' \neq j} \mathbf{w}^{(l)}_{j'} \big)$$

    - For $j = 1, \cdots, m$

  $$ \mathbf{w}^{(l+1)}_{i} = \big( \sum_{ u \in \mathcal{U}_j  } \frac{1}{|\mathcal{I}_u|} \sum_{i \in \mathcal{I}_u} \mathbf{q}^{(l+1)}_i {(\mathbf{q}^{(l+1)}_i)^\intercal} + \lambda |\Omega| \mathbf{I} \big)^{-1} \Big( \sum_{u \in \mathcal{U}_j  } \tau_u \sum_{i \in \mathcal{I}_u} \tilde{r}_{ui} \mathbf{q}^{(l+1)}_i \Big) $$

### Make a `skikit-learn` compatible class


- **Parameter**:
    - #Users: `n`
    - #Items: `m`
    - latent factors for item2user: `W`
    - latent factors for items: `Q`
    - l2-weight (*hp*): `lam`
    - #Latent factors (*hp*): `K`

- **Method**:

  - `fit`: 
    - *input*: `train_pair`, `train_rating`
    - *output*: fitted P and W
  - `predict`: 
    - *input*: `test_pair`
    - *output*: predicted ratings
  - `rmse`: 
    - *input*: `test_pair`, `test_rating`
    - *output*: RMSE for the predicted ratings

  - `obj`: 
    - *input*: `test_pair`, `test_rating`
    - *output*: objective function for the MF method

  - `GenP`:
    - Generate fake P from current W
  
  - `update_W` and `update_Q` and `update_P`

In [24]:
class SVDpp(object):

    def __init__(self, n_user, n_item, lam=.001, K=10, iterNum=50, tol=1e-4, verbose=1, rd_ratio=.5):
        self.P = np.random.randn(n_user, K)
        self.Q = np.random.randn(n_item, K)
        self.W = np.zeros((n_item, K))
        # self.index_item = []
        # self.index_user = []
        self.n_user = n_user
        self.n_item = n_item
        self.lam = lam
        self.K = K
        self.iterNum = iterNum
        self.tol = tol
        self.verbose = verbose
        self.fake_P = np.random.randn(n_user, K)
        self.rd_ratio=rd_ratio

    def fit_P(self, train_pair, train_rating):
        n_user, n_item, n_obs = self.n_user, self.n_item, len(train_pair)
        K, iterNum, lam = self.K, self.iterNum, self.lam
        train_rating_res = train_rating - self.predict(train_pair, option='fake_P')
        
        for user_id in range(n_user):
            index_user_tmp = self.index_user[user_id]
            if len(index_user_tmp) == 0:
                self.P[user_id,:] = 0.
                continue
            ## compute `sum_qi` and `sum_matrix`
            sum_qi, sum_matrix = np.zeros((K)), np.zeros((K, K))
            for record_ind in index_user_tmp:
                ## double-check
                if user_id != train_pair[record_ind][0]:
                    raise ValueError('the user_id is worning in updating P!')
                item_id, rating_tmp = train_pair[record_ind][1], train_rating_res[record_ind]
                sum_matrix = sum_matrix + np.outer(self.Q[item_id,:], self.Q[item_id,:])
                sum_qi = sum_qi + rating_tmp * self.Q[item_id,:]                    
            self.P[user_id,:] = np.dot(np.linalg.inv(sum_matrix + lam*n_obs*np.identity(K)), sum_qi)
        return self.obj(test_pair=train_pair, test_rating=train_rating)


    def fit_Q(self, train_pair, train_rating):
        n_user, n_item, n_obs = self.n_user, self.n_item, len(train_pair)
        K, iterNum, lam = self.K, self.iterNum, self.lam
        for item_id in range(n_item):
            index_item_tmp = self.index_item[item_id]
            if len(index_item_tmp) == 0:
                self.Q[item_id,:] = 0.
                continue
            ## compute `sum_pu` and `sum_matrix`
            sum_pu, sum_matrix = np.zeros((K)), np.zeros((K, K))
            for record_ind in index_item_tmp:
                ## double-check
                if item_id != train_pair[record_ind][1]:
                    raise ValueError('the item_id is worning in updating Q!')
                user_id, rating_tmp = train_pair[record_ind][0], train_rating[record_ind]
                sum_matrix = sum_matrix + np.outer(self.fake_P[user_id,:] + self.P[user_id,:], self.fake_P[user_id,:] + self.P[user_id,:])
                sum_pu = sum_pu + rating_tmp * (self.fake_P[user_id,:] + self.P[user_id,:])                    
            self.Q[item_id,:] = np.dot(np.linalg.inv(sum_matrix + lam*n_obs*np.identity(K)), sum_pu)
        return self.obj(test_pair=train_pair, test_rating=train_rating)

    def fit_W(self, train_pair, train_rating):
        K, lam, n_obs = self.K, self.lam, len(train_pair)
        Q_out = np.array([np.outer(self.Q[j,:], self.Q[j,:]) for j in range(self.n_item)])
        for item_id in range(n_item):
            # print('Fitting Reg-SVDpp: Block W-%d' %item_id)
            ## Users give rating to `item_id`
            U_j = self.index_item[item_id]
            if len(U_j) == 0:
              self.W[item_id,:] = np.zeros(self.K)
              continue
            pred_tmp = self.Q @ self.W[item_id]
            train_rating_res = train_rating - self.predict(train_pair) + pred_tmp[train_pair[:,1]] / np.sqrt(self.vol_I_u[train_pair[:,0]])
            ## compute `sum_pu` and `sum_matrix`
            sum_qi, sum_matrix = np.zeros((K)), np.zeros((K, K))

            for idx1 in np.random.choice(U_j, min(int(self.rd_ratio*len(U_j)), len(U_j)), replace=False):  
                user_id = train_pair[idx1, 0]
                I_u = self.index_user[user_id]

                for idx2 in I_u:
                    item_j = train_pair[idx2, 1]
                    sum_matrix += Q_out[item_j] / self.vol_I_u[user_id]
                    sum_qi += train_rating_res[idx2] * self.Q[item_j,:] / np.sqrt(self.vol_I_u[user_id])
            self.W[item_id,:] = np.dot(np.linalg.inv(sum_matrix + lam*n_obs*np.identity(K)), sum_qi)
            self.GenP(train_pair)
        return self.obj(test_pair=train_pair, test_rating=train_rating)


    def fit(self, train_pair, train_rating):
        diff, tol = 1., self.tol
        n_user, n_item, n_obs = self.n_user, self.n_item, len(train_pair)
        K, iterNum, lam = self.K, self.iterNum, self.lam
        ## store user/item index set
        self.index_item = [np.where(train_pair[:,1] == i)[0] for i in range(n_item)]
        self.index_user = [np.where(train_pair[:,0] == u)[0] for u in range(n_user)]
        ## compute the volumne of user index
        self.vol_I_u = np.array([len(self.index_user[u]) for u in range(self.n_user)]) + 1

        obj = self.obj(test_pair=train_pair, test_rating=train_rating)

        if self.verbose:
            print('Fitting Reg-SVDpp: K: %d, lam: %.5f' %(K, lam))
        
        for i in range(iterNum):
            obj_old = obj
            ## P update
            obj = self.fit_P(train_pair, train_rating)     
            ## Q update
            obj = self.fit_Q(train_pair, train_rating)            
            ## W update
            obj = self.fit_W(train_pair, train_rating)
            
            diff = abs(obj_old - obj) / obj_old
            if self.verbose:
                print("Reg-SVDpp: ite: %d; diff: %.3f Obj: %.3f" %(i, diff, obj))
            if(diff < tol):
                break

    def GenP(self, train_pair):
        for u in range(self.n_user):
          I_u_tmp = self.index_user[u]
          if len(I_u_tmp) == 0:
            self.fake_P[u] = 0
          else:
            self.fake_P[u,:] = np.sum(self.W[train_pair[I_u_tmp, 1]], 0) / np.sqrt(self.vol_I_u[u])

    def predict(self, test_pair, option='all_P'):
        # predict ratings for user-item pairs
        # P has two parts
        if option == 'all_P':
            pred_rating = [np.dot(self.fake_P[line[0]] + self.P[line[0]], self.Q[line[1]]) for line in test_pair]
        elif option == 'fake_P':
            pred_rating = [np.dot(self.fake_P[line[0]], self.Q[line[1]]) for line in test_pair]
        else:
            pred_rating = [np.dot(self.P[line[0]], self.Q[line[1]]) for line in test_pair]
        return np.array(pred_rating)
    
    def rmse(self, test_pair, test_rating):
        # report the rmse for the fitted `MF`
        pred_rating = self.predict(test_pair=test_pair)
        return np.sqrt( np.mean( (pred_rating - test_rating)**2) )
      
    def obj(self, test_pair, test_rating):
        return (self.rmse(test_pair, test_rating))**2 + self.lam*np.sum(self.W**2) + self.lam*np.sum(self.Q**2) + self.lam*np.sum(self.P**2)

In [26]:
# fitting
cue = SVDpp(n_user, n_item, K=3, lam=.0001, rd_ratio=1.)
cue.fit(train_pair=train_pair, train_rating=train_rating)

Fitting Reg-SVDpp: K: 3, lam: 0.00010
Reg-SVDpp: ite: 0; diff: 0.661 Obj: 6.972
Reg-SVDpp: ite: 1; diff: 0.518 Obj: 3.363
Reg-SVDpp: ite: 2; diff: 0.194 Obj: 2.712
Reg-SVDpp: ite: 3; diff: 0.185 Obj: 2.211
Reg-SVDpp: ite: 4; diff: 0.172 Obj: 1.831
Reg-SVDpp: ite: 5; diff: 0.186 Obj: 1.491
Reg-SVDpp: ite: 6; diff: 0.284 Obj: 1.067
Reg-SVDpp: ite: 7; diff: 0.248 Obj: 0.803
Reg-SVDpp: ite: 8; diff: 0.160 Obj: 0.674
Reg-SVDpp: ite: 9; diff: 0.089 Obj: 0.614
Reg-SVDpp: ite: 10; diff: 0.064 Obj: 0.575
Reg-SVDpp: ite: 11; diff: 0.052 Obj: 0.545
Reg-SVDpp: ite: 12; diff: 0.045 Obj: 0.521
Reg-SVDpp: ite: 13; diff: 0.040 Obj: 0.500
Reg-SVDpp: ite: 14; diff: 0.035 Obj: 0.483
Reg-SVDpp: ite: 15; diff: 0.032 Obj: 0.467
Reg-SVDpp: ite: 16; diff: 0.031 Obj: 0.452
Reg-SVDpp: ite: 17; diff: 0.029 Obj: 0.439
Reg-SVDpp: ite: 18; diff: 0.026 Obj: 0.428
Reg-SVDpp: ite: 19; diff: 0.024 Obj: 0.417
Reg-SVDpp: ite: 20; diff: 0.022 Obj: 0.408
Reg-SVDpp: ite: 21; diff: 0.019 Obj: 0.400
Reg-SVDpp: ite: 22; diff: 

In [27]:
# pediction
pred_rating = cue.predict(test_pair)
pred_train_rating = cue.predict(train_pair)
print('train rmse: %.3f; test rmse: %.3f' %(rmse(train_rating, pred_train_rating), rmse(test_rating, pred_rating)))

train rmse: 0.314; test rmse: 2.664
