In [1]:
import numpy as np

In [2]:
# Create rating table
R = np.array([
    [5, 3, 0, 1, 4],
    [4, 0, 0, 2, 2],
    [4, 5, 4, 3, 0],
    [0, 3, 1, 0, 5]
])

R

array([[5, 3, 0, 1, 4],
       [4, 0, 0, 2, 2],
       [4, 5, 4, 3, 0],
       [0, 3, 1, 0, 5]])

In [3]:
#latent features
k = 2

# Number of users & movies
num_users, num_movies = R.shape
num_users, num_movies

(4, 5)

In [4]:
# Randomly initialize Users and Movie Matrix
U = np.random.rand(num_users, k)
M = np.random.rand(num_movies, k)
U, M

(array([[0.263423  , 0.70426661],
        [0.83940214, 0.48501468],
        [0.01955906, 0.11263134],
        [0.90758998, 0.27090096]]),
 array([[0.56888766, 0.35091158],
        [0.23974734, 0.48114909],
        [0.32850445, 0.00068152],
        [0.64762952, 0.28462607],
        [0.23290315, 0.51899164]]))

In [5]:
V = M.transpose()
V

array([[0.56888766, 0.23974734, 0.32850445, 0.64762952, 0.23290315],
       [0.35091158, 0.48114909, 0.00068152, 0.28462607, 0.51899164]])

In [6]:
np.dot(U, V)

array([[0.3969934 , 0.4020122 , 0.0870156 , 0.37105315, 0.42686053],
       [0.64772278, 0.43460879, 0.27607788, 0.68166942, 0.44721796],
       [0.05065055, 0.0588817 , 0.006502  , 0.04472484, 0.06301009],
       [0.61137902, 0.34793603, 0.29833197, 0.66488754, 0.3519759 ]])

In [7]:
# Hyperparameters
num_iterations = 10000
learning_rate = 0.01

In [8]:
# Perform matrix factorization with SGD
for i in range(num_iterations):
  for u in range(num_users):
    for m in range(num_movies):
      if R[u, m] > 0:
        error = R[u, m] - np.dot(U[u, :], V[:, m])
        U[u, :] += learning_rate * (2 * error * V[:, m])
        V[:, m] += learning_rate * (2 * error * U[u, :])

U, V

(array([[0.76933968, 1.78189008],
        [1.61329715, 0.76515892],
        [2.44481831, 0.21486463],
        [0.56417754, 2.26805811]]),
 array([[1.4443097 , 1.97200475, 1.63306533, 1.22421642, 0.22009329],
        [2.18242151, 0.83218292, 0.03468219, 0.03264047, 2.14978104]]))

In [9]:
# Reconstruct Matrix R from new U & V
R_hat = np.dot(U, V)
R_hat

array([[5.        , 3.        , 1.31818182, 1.        , 4.        ],
       [4.        , 3.81818182, 2.66115702, 2.        , 2.        ],
       [4.        , 5.        , 4.        , 3.        , 1.        ],
       [5.76470588, 3.        , 1.        , 0.76470588, 5.        ]])

In [10]:
# Display the results
print("Original Rating Table:")
print(R)
print("\nReconstructed Rating Table:")
R_hat_rounded1 = np.clip(np.round(R_hat), 1, 5).astype(int)
print(R_hat_rounded1)

Original Rating Table:
[[5 3 0 1 4]
 [4 0 0 2 2]
 [4 5 4 3 0]
 [0 3 1 0 5]]

Reconstructed Rating Table:
[[5 3 1 1 4]
 [4 4 3 2 2]
 [4 5 4 3 1]
 [5 3 1 1 5]]


In [11]:
# Print original matrix with highlighted 0 values
print("Original Matrix: ")
for row_idx, row in enumerate(R):
    for col_idx, val in enumerate(row):
        if val == 0:
            print('\033[91m' + str(val) + '\033[0m', end=' ')  # Print in red color
        else:
            print(val, end=' ')
    print()  # Newline for next row

# Print predicted matrix with highlighted predicted values
print("Predicted Matrix: ")
for row_idx, row in enumerate(R_hat_rounded1):
    for col_idx, val in enumerate(row):
        if R[row_idx, col_idx] == 0:  # If original value is 0
            print('\033[92m' + str(val) + '\033[0m', end=' ')  # Print predicted value in green color
        else:
            print(val, end=' ')  # Print predicted value
    print()  # Newline for next row


Original Matrix: 
5 3 [91m0[0m 1 4 
4 [91m0[0m [91m0[0m 2 2 
4 5 4 3 [91m0[0m 
[91m0[0m 3 1 [91m0[0m 5 
Predicted Matrix: 
5 3 [92m1[0m 1 4 
4 [92m4[0m [92m3[0m 2 2 
4 5 4 3 [92m1[0m 
[92m5[0m 3 1 [92m1[0m 5 


### Scikit-Learn - Non-Negative Matrix Factorization

In [12]:
from sklearn.decomposition import NMF

In [13]:
# Initialize NMF Model.
model = NMF(n_components = 2, init = 'random', random_state = 0)
model.fit(R)

In [14]:
# Reconstruct the rating matrix
R_hat = model.transform(R) @ model.components_
R_hat

array([[3.71042775, 3.33876191, 1.22397216, 1.31418345, 4.39659625],
       [2.28188978, 2.02592476, 0.959089  , 0.98900915, 1.9468325 ],
       [4.99018785, 4.2763752 , 3.25788743, 3.1795794 , 0.        ],
       [1.9402299 , 1.83084328, 0.        , 0.12644757, 4.64710542]])

In [15]:
R_hat_rounded2 = np.clip(np.round(R_hat), 1, 5).astype(int)
R_hat_rounded2

array([[4, 3, 1, 1, 4],
       [2, 2, 1, 1, 2],
       [5, 4, 3, 3, 1],
       [2, 2, 1, 1, 5]])

In [16]:
# Print original matrix with highlighted 0 values
print("Original Matrix: ")
for row_idx, row in enumerate(R):
    for col_idx, val in enumerate(row):
        if val == 0:
            print('\033[91m' + str(val) + '\033[0m', end=' ')  # Print in red color
        else:
            print(val, end=' ')
    print()  # Newline for next row

# Print predicted matrix with highlighted predicted values
print("Predicted Matrix: ")
for row_idx, row in enumerate(R_hat_rounded2):
    for col_idx, val in enumerate(row):
        if R[row_idx, col_idx] == 0:  # If original value is 0
            print('\033[92m' + str(val) + '\033[0m', end=' ')  # Print predicted value in green color
        else:
            print(val, end=' ')  # Print predicted value
    print()  # Newline for next row

Original Matrix: 
5 3 [91m0[0m 1 4 
4 [91m0[0m [91m0[0m 2 2 
4 5 4 3 [91m0[0m 
[91m0[0m 3 1 [91m0[0m 5 
Predicted Matrix: 
4 3 [92m1[0m 1 4 
2 [92m2[0m [92m1[0m 1 2 
5 4 3 3 [92m1[0m 
[92m2[0m 2 1 [92m1[0m 5 


In [17]:
# Print predicted matrix with highlighted predicted values
print("Predicted Matrix: ")
for row_idx, row in enumerate(R_hat_rounded1):
    for col_idx, val in enumerate(row):
        if R[row_idx, col_idx] == 0:  # If original value is 0
            print('\033[92m' + str(val) + '\033[0m', end=' ')  # Print predicted value in green color
        else:
            print(val, end=' ')  # Print predicted value
    print()  # Newline for next row

# Print predicted matrix with highlighted predicted values
print("Predicted Matrix: ")
for row_idx, row in enumerate(R_hat_rounded2):
    for col_idx, val in enumerate(row):
        if R[row_idx, col_idx] == 0:  # If original value is 0
            print('\033[92m' + str(val) + '\033[0m', end=' ')  # Print predicted value in green color
        else:
            print(val, end=' ')  # Print predicted value
    print()  # Newline for next row

Predicted Matrix: 
5 3 [92m1[0m 1 4 
4 [92m4[0m [92m3[0m 2 2 
4 5 4 3 [92m1[0m 
[92m5[0m 3 1 [92m1[0m 5 
Predicted Matrix: 
4 3 [92m1[0m 1 4 
2 [92m2[0m [92m1[0m 1 2 
5 4 3 3 [92m1[0m 
[92m2[0m 2 1 [92m1[0m 5 
