In [1]:
import numpy as np 

Matrix Factorization

In [2]:
# Create rating table
R = np.array([
    [5, 3, 0, 1, 4],
    [4, 0, 0, 2, 5],
    [0, 5, 4, 3, 0],
    [2, 3, 1, 3, 5]
])

In [3]:
R

array([[5, 3, 0, 1, 4],
       [4, 0, 0, 2, 5],
       [0, 5, 4, 3, 0],
       [2, 3, 1, 3, 5]])

In [4]:
# Number of latent features
k=2 

# Number of users and movies
num_users, num_movies = R.shape
num_users, num_movies

(4, 5)

In [40]:
# Randomly initialize user and movie matrix
U = np.random.rand(num_users, k)
M = np.random.rand(k, num_movies)
U, M 

(array([[0.60905227, 0.32722576],
        [0.88661103, 0.45602686],
        [0.34791848, 0.78991653],
        [0.13710843, 0.49376367]]),
 array([[0.23486086, 0.86349094, 0.2370529 , 0.74341572, 0.89243882],
        [0.41817907, 0.73733112, 0.75519976, 0.32530611, 0.77062237]]))

In [41]:
# Hyperparameters
num_iterations = 10000
learning_rate = 0.01 

In [42]:
# perform matrix factorization using SGD
for i in range(num_iterations):
    for user in range(num_users):
        for movie in range(num_movies):
            if R[user,movie] > 0:
                error = R[user,movie] - np.dot(U[user,:], M[:,movie])
                # print(error)
                U[user,:] +=  learning_rate * (2 * error * M[:,movie])
                M[:,movie] += learning_rate * (2 * error * U[user,:])

In [43]:
U

array([[ 4.07318025, -2.3350948 ],
       [ 2.65211244, -0.78611889],
       [ 0.41613359,  1.69171931],
       [ 0.56818769,  1.01589755]])

In [44]:
M

array([[ 1.79048241,  2.03012842, -4.33900824,  1.24570748,  2.87045913],
       [ 0.97640514,  2.26889684,  3.42511305,  1.71180164,  3.30000813]])

In [45]:
error

0.028065165381844004

In [46]:
# Reconstruct the Matrix
R_approx = np.dot(U, M)
R_approx

array([[  5.01295903,   2.97098978, -25.67152632,   1.07677199,
          3.98606563],
       [  3.98099015,   3.60050618, -14.20008377,   1.9580767 ,
          5.01858165],
       [  2.39688331,   4.68314122,   3.98872277,   3.41426862,
          6.77718194],
       [  2.00925765,   3.4584607 ,   1.01419288,   2.44681074,
          4.9834297 ]])

In [47]:
# Display Results comparing to the original
print("Original Matrix: ")
print(R)
print("Predicted Matrix: ")
print(R_approx)
print("Scaled Predicted Matrix: ")
R_approx_scaled = np.clip(np.round(R_approx), 1, 5).astype(int)
print(R_approx_scaled)

Original Matrix: 
[[5 3 0 1 4]
 [4 0 0 2 5]
 [0 5 4 3 0]
 [2 3 1 3 5]]
Predicted Matrix: 
[[  5.01295903   2.97098978 -25.67152632   1.07677199   3.98606563]
 [  3.98099015   3.60050618 -14.20008377   1.9580767    5.01858165]
 [  2.39688331   4.68314122   3.98872277   3.41426862   6.77718194]
 [  2.00925765   3.4584607    1.01419288   2.44681074   4.9834297 ]]
Scaled Predicted Matrix: 
[[5 3 1 1 4]
 [4 4 1 2 5]
 [2 5 4 3 5]
 [2 3 1 2 5]]
