In [24]:
def generate_ratings_matrix(n_users=100, n_items=100, rank=5, noise=0.1, sparsity=0.8):
    U = np.random.randn(n_users, rank)
    V = np.random.randn(n_items, rank)
    R_true = U @ V.T
    R_noisy = R_true + noise * np.random.randn(*R_true.shape)
    mask = np.random.rand(*R_noisy.shape) > sparsity
    R_sparse = R_noisy * mask
    return R_sparse, mask


In [26]:
def matrix_factorization(R, mask, k=5, alpha=0.01, epochs=50):
    n_users, n_items = R.shape
    U = np.random.randn(n_users, k)
    V = np.random.randn(n_items, k)

    for epoch in range(epochs):
        for i in range(n_users):
            for j in range(n_items):
                if mask[i, j]:
                    prediction = U[i, :] @ V[j, :].T
                    error = R[i, j] - prediction
                    U[i, :] += alpha * error * V[j, :]
                    V[j, :] += alpha * error * U[i, :]
        pred_matrix = U @ V.T
        train_mse = np.mean(((R - pred_matrix)[mask]) ** 2)
        print(f"Epoch {epoch+1}, Training MSE: {train_mse:.4f}")

    return U, V


In [28]:
def predict_full_matrix(U, V):
    return U @ V.T

def evaluate_mse(R_true, R_pred, mask):
    return np.mean(((R_true - R_pred)[mask]) ** 2)


In [30]:
# Generate data
R, mask = generate_ratings_matrix(n_users=100, n_items=100, rank=5, noise=0.1, sparsity=0.8)

# Train model
U, V = matrix_factorization(R, mask, k=5, alpha=0.01, epochs=20)

# Predict
R_pred = predict_full_matrix(U, V)

# Evaluate
mse = evaluate_mse(R, R_pred, mask)
print("Final MSE on observed entries:", mse)


Epoch 1, Training MSE: 5.8180
Epoch 2, Training MSE: 4.7182
Epoch 3, Training MSE: 4.1290
Epoch 4, Training MSE: 3.7290
Epoch 5, Training MSE: 3.4134
Epoch 6, Training MSE: 3.1414
Epoch 7, Training MSE: 2.8961
Epoch 8, Training MSE: 2.6713
Epoch 9, Training MSE: 2.4658
Epoch 10, Training MSE: 2.2797
Epoch 11, Training MSE: 2.1131
Epoch 12, Training MSE: 1.9653
Epoch 13, Training MSE: 1.8349
Epoch 14, Training MSE: 1.7200
Epoch 15, Training MSE: 1.6187
Epoch 16, Training MSE: 1.5290
Epoch 17, Training MSE: 1.4494
Epoch 18, Training MSE: 1.3786
Epoch 19, Training MSE: 1.3152
Epoch 20, Training MSE: 1.2583
Final MSE on observed entries: 1.2583209416715506


In [32]:
# Compute regular MSE on observed entries
mse = np.mean(((R - R_pred)[mask]) ** 2)

# Compute normalized MSE (divide by variance of observed ratings)
variance = np.var(R[mask])
nmse = mse / variance

print(f"MSE: {mse:.4f}")
print(f"Normalized MSE: {nmse:.4f}")


MSE: 1.2583
Normalized MSE: 0.2855
