In [1]:
import numpy as np

In [4]:
#원본 행렬 R 생성
R = np.array([[4, np.NaN, np.NaN, 2, np.NaN],
            [np.NaN, 5, np.NaN,3, 1],
            [np.NaN, np.NaN,3, 4, 4],
            [5, 2, 1, 2, np.NaN]])

num_users, num_items = R.shape
R

array([[ 4., nan, nan,  2., nan],
       [nan,  5., nan,  3.,  1.],
       [nan, nan,  3.,  4.,  4.],
       [ 5.,  2.,  1.,  2., nan]])

In [5]:
#잠재 요인 차원 K는 3
K = 3

In [9]:
#P, Q를 임의의 값으로
np.random.seed(1)
P = np.random.normal(scale = 1/K, size = (num_users, K))
Q = np.random.normal(scale = 1/K, size = (num_items, K))

In [10]:
from sklearn.metrics import mean_squared_error

In [14]:
def get_rmse(R,P,Q, non_zeros) :
    error = 0
    
    full_pred_matrix = np.dot(P, Q.T)
    
    x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]
    y_non_zero_ind = [non_zero[1] for non_zero in non_zeros]
    R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]
    
    #new predict Rating Matrix
    full_pred_matrix_non_zeros = full_pred_matrix[x_non_zero_ind, y_non_zero_ind]
    rmse = np.sqrt(mean_squared_error(R_non_zeros, full_pred_matrix_non_zeros))
    
    return rmse

array([[ 4., nan, nan,  2., nan],
       [nan,  5., nan,  3.,  1.],
       [nan, nan,  3.,  4.,  4.],
       [ 5.,  2.,  1.,  2., nan]])

In [11]:
#R이 0보다 큰 값들의 행,열 index와 그 값 저장
non_zeros = [(i,j,R[i,j]) for i in range(num_users) for j in range(num_items) if R[i,j] > 0 ]
non_zeros

[(0, 0, 4.0),
 (0, 3, 2.0),
 (1, 1, 5.0),
 (1, 3, 3.0),
 (1, 4, 1.0),
 (2, 2, 3.0),
 (2, 3, 4.0),
 (2, 4, 4.0),
 (3, 0, 5.0),
 (3, 1, 2.0),
 (3, 2, 1.0),
 (3, 3, 2.0)]

$$(p_u ) ́= p_u+ η(e_((u,i) )* q_i- λ* p_u )$$
$$(q_i ) ́= q_i+ η(e_((u,i) )* p_u- λ* q_i )$$

In [15]:
steps = 1000
learning_rate = 0.01 #SGD의 학습률
r_lambda = 0.01 #L2 정규화 계수

In [16]:
for step in range(steps) :
    for i,j,r in non_zeros :
        
        #error
        eij = r - np.dot(P[i, :], Q[j, :].T)
        
        P[i, :] = P[i, :] + learning_rate * (eij * Q[j, :] - r_lambda * P[i, :])
        Q[j, :] = Q[j, :] + learning_rate * (eij * Q[j, :] - r_lambda * P[i, :])
        
        rmse = get_rmse(R, P, Q, non_zeros)
        
        if(step % 50) == 0 :
            print('### iteration step :', step, 'rmse : ', rmse)

### iteration step : 0 rmse :  3.2648266557429473
### iteration step : 0 rmse :  3.264487072423471
### iteration step : 0 rmse :  3.261812305059051
### iteration step : 0 rmse :  3.2616702512504885
### iteration step : 0 rmse :  3.2615018540154637
### iteration step : 0 rmse :  3.2611626448664617
### iteration step : 0 rmse :  3.2595886638235
### iteration step : 0 rmse :  3.2582810865068277
### iteration step : 0 rmse :  3.2601465327057806
### iteration step : 0 rmse :  3.2593863368770504
### iteration step : 0 rmse :  3.2595001564903145
### iteration step : 0 rmse :  3.2592719522959612
### iteration step : 50 rmse :  0.8923423481752016
### iteration step : 50 rmse :  0.8921865800732791
### iteration step : 50 rmse :  0.8921447037901181
### iteration step : 50 rmse :  0.8905138693524585
### iteration step : 50 rmse :  0.8809740483028858
### iteration step : 50 rmse :  0.8786550460276334
### iteration step : 50 rmse :  0.8780892463658095
### iteration step : 50 rmse :  0.87819923108731

### iteration step : 700 rmse :  0.017826296786941986
### iteration step : 700 rmse :  0.017823056645620478
### iteration step : 700 rmse :  0.017694364812240765
### iteration step : 700 rmse :  0.01757936674776322
### iteration step : 700 rmse :  0.01777265866922281
### iteration step : 700 rmse :  0.017780960389443285
### iteration step : 700 rmse :  0.017799005861404732
### iteration step : 700 rmse :  0.01782570799882617
### iteration step : 700 rmse :  0.017356525959111403
### iteration step : 700 rmse :  0.018104037717286508
### iteration step : 700 rmse :  0.017635120324631638
### iteration step : 700 rmse :  0.017836251742625785
### iteration step : 750 rmse :  0.017971349925203214
### iteration step : 750 rmse :  0.01796838047236166
### iteration step : 750 rmse :  0.01783819685940298
### iteration step : 750 rmse :  0.017715104290272055
### iteration step : 750 rmse :  0.017916433837706962
### iteration step : 750 rmse :  0.0179255801201518
### iteration step : 750 rmse :  0.

In [18]:
pred_matrix = np.dot(P, Q.T)
np.round(pred_matrix, 3)

array([[  4.   ,  -3.35 ,  -5.152,   2.   ,   1.99 ],
       [-40.075,   4.955,  36.291,   2.989,   0.975],
       [ -6.38 ,  -3.928,   3.   ,   3.999,   3.999],
       [  4.973,   2.026,   1.014,   2.003,  -8.542]])