In [11]:
import numpy as np

## Define Variables

In [12]:
R = [
     [5,3,0,1],
     [4,0,0,1],
     [1,1,0,5],
     [1,0,0,4],
     [0,1,5,4],
    ]

R = np.array(R) # Matrix filled with ratings
N = len(R)      # Item(a number)
M = len(R[0])   # Number of USers
K = 2           # latent feature


np.random.seed(1)#If there is no part to make a fixed matrix, P will keep changing. 
P = np.random.rand(N,K) # Item(Evaluation Matrix)
print "P : \n", P.shape

np.random.seed(2)
Q = np.random.rand(M,K) # Number of Users
print "Q : \n", Q.shape

P : 
(5, 2)
Q : 
(4, 2)


## Implementating Matrix Factorization 
steps : the maximum number of steps to perform the optimisation

alpha : the learning rate

beta  : the regularization parameter

In [13]:
def matrix_factorization(R, P, Q, K, alpha, steps=5000,  beta=0.02):
    Q = Q.T
    for step in xrange(steps):
        # Stochastic Gradient Descent with weighted beta regularization(SGD-WR)
        for i in xrange(len(R)):
            for j in xrange(len(R[i])):
                if R[i][j] > 0: # 0 means no data (no review for that item)
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j])#prediction error
                    for k in xrange(K):
                        # gradient descent :weight update
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
                        
        if step % 100 == 0 : 
            print "step : ",step,"\n", np.dot(P,Q) # np.dot(P,Q) = R hat

        # Regularizied Error Calculation
        e = 0
        for i in xrange(len(R)):
            for j in xrange(len(R[i])):
                if R[i][j] > 0:
                    e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2) 
                    for k in xrange(K):
                        #Regularization
                        e = e + (beta/2) * ( pow(P[i][k],2) + pow(Q[k][j],2) )
        if e < 0.001: 
            print(e)
            break
    return P, Q.T

In [14]:
alpha=0.0002
nP, nQ = matrix_factorization(R, P, Q, K, alpha)


step :  0 
[[0.20296966 0.54466713 0.41506942 0.5338031 ]
 [0.00884124 0.13245921 0.10062453 0.1880709 ]
 [0.06707004 0.12204015 0.09317115 0.08856389]
 [0.0912679  0.25387071 0.19343875 0.25360412]
 [0.18897971 0.45467355 0.34664197 0.41750375]]
step :  100 
[[0.50068175 0.76266639 0.64093035 0.84560645]
 [0.1292764  0.23260791 0.19668255 0.31058172]
 [0.18889986 0.27485604 0.23054928 0.2857244 ]
 [0.24133153 0.38566153 0.3247113  0.45424876]
 [0.45991712 0.69599235 0.58474436 0.76492239]]
step :  200 
[[0.92034999 1.03492597 0.96457379 1.31324414]
 [0.32535044 0.38148313 0.35870019 0.53548513]
 [0.4446348  0.50118141 0.46735257 0.63988709]
 [0.49197359 0.56998718 0.53461937 0.77842511]
 [0.892709   1.01018991 0.94279805 1.30273071]]
step :  300 
[[1.45089044 1.32208772 1.37614749 1.89061858]
 [0.61798348 0.57062739 0.59968643 0.87007707]
 [0.84823181 0.77857313 0.81471414 1.15402933]
 [0.84984266 0.78738167 0.82948347 1.21950514]
 [1.4837409  1.35998067 1.42166205 2.00213823]]
step :

step :  4000 
[[5.04552835 2.80512757 4.7937381  0.99428598]
 [3.92930336 2.18728391 3.88814834 0.99777677]
 [1.11731019 0.67922755 4.34814235 4.96041762]
 [0.94351594 0.57093406 3.52222442 3.97310453]
 [2.5140215  1.44117092 4.84979616 4.04524863]]
step :  4100 
[[5.04181879 2.81452636 4.81520664 0.99455838]
 [3.93155168 2.19768562 3.91010052 0.99749487]
 [1.11574327 0.6858688  4.38158576 4.96047372]
 [0.94399383 0.57731536 3.55053813 3.97305794]
 [2.47616639 1.42956739 4.8527116  4.04494877]]
step :  4200 
[[5.03830891 2.82339754 4.83595786 0.99481894]
 [3.93369048 2.20755483 3.93132607 0.99725773]
 [1.11411004 0.69246818 4.41440078 4.960539  ]
 [0.94454011 0.58373094 3.5784457  3.9730103 ]
 [2.43949245 1.41826768 4.85559314 4.04460843]]
step :  4300 
[[5.03498833 2.83177007 4.85601128 0.9950689 ]
 [3.93572914 2.21691944 3.95184543 0.99705945]
 [1.1124189  0.6990137  4.44659112 4.96061608]
 [0.94514515 0.59016243 3.60593749 3.97296414]
 [2.4039613  1.40726846 4.85844033 4.04422676]]


## Predicted rating

In [15]:
nR = np.dot(nP, nQ.T)
print nR

[[5.01629275 2.87852691 4.97827651 0.99659317]
 [3.94768814 2.27031369 4.07714135 0.99643295]
 [1.09961811 0.74256771 4.65450497 4.96151174]
 [0.95043828 0.63440334 3.78595081 3.9728005 ]
 [2.18446742 1.33835856 4.87730323 4.04049655]]


In [16]:
print R

[[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]]
