In [14]:
# Notebook: Matrix Factorization for Recommendation Systems

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# User and item data
ratings_dict = {
    "user_id": [1, 1, 1, 2, 2, 2, 3, 3, 3],
    "item_id": [1, 2, 3, 1, 2, 3, 1, 2, 3],
    "rating": [5, 3, 1, 4, 2, 1, 1, 5, 4]
}

ratings_df = pd.DataFrame(ratings_dict)

In [15]:
R = ratings_df.pivot(index='user_id', columns='item_id', values='rating').fillna(0).values

# 4. Matrix factorization
def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    Q = Q.T  # Transpose Q to make it easier to work with in the dot product
    for step in range(steps):  # Iterate over the number of steps
        for i in range(len(R)):  # Iterate over each user
            for j in range(len(R[i])):  # Iterate over each item
                if R[i][j] > 0:  # Only consider observed ratings (> 0)
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j])  # Calculate the error of the prediction
                    for k in range(K):  # Iterate over each latent feature (K = number of latents)
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])  # Update the user feature matrix P 
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])  # Update the item feature matrix Q
        eR = np.dot(P,Q)  # Calculate the predicted rating matrix
        e = 0  # Initialize the total error
        for i in range(len(R)):  # Iterate over each user
            for j in range(len(R[i])):  # Iterate over each item
                if R[i][j] > 0:  # Only consider observed ratings
                    e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2)  # Add the squared error to the total error
                    for k in range(K):  # Iterate over each latent feature
                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))  # Add regularization term to the total error
        if e < 0.001:  # If the total error is less than the threshold, stop training
            break
    return P, Q.T  # Return the factorized matrices


In [16]:
R

array([[5, 3, 1],
       [4, 2, 1],
       [1, 5, 4]])

In [17]:
N = len(R)
M = len(R[0])
K = 2

P = np.random.rand(N, K)
Q = np.random.rand(M, K)

nP, nQ = matrix_factorization(R, P, Q, K)
nR = np.dot(nP, nQ.T)

print("Orijinal Matris:")
print(R)
print("\nFaktörize Edilmiş Kullanıcı Matris (P):")
print(nP)
print("\nFaktörize Edilmiş Öğe Matris (Q):")
print(nQ)
print("\nTahmin Edilen Matris:")
print(nR)

predicted_ratings = np.dot(nP, nQ.T)
rmse = np.sqrt(mean_squared_error(R[R > 0], predicted_ratings[R > 0]))
print("\nRMSE:", rmse)

Orijinal Matris:
[[5 3 1]
 [4 2 1]
 [1 5 4]]

Faktörize Edilmiş Kullanıcı Matris (P):
[[0.25520581 2.08290601]
 [0.14809115 1.63115699]
 [2.38341103 0.54968169]]

Faktörize Edilmiş Öğe Matris (Q):
[[-0.13767476  2.42461011]
 [ 1.82438371  1.15376998]
 [ 1.58578104  0.35619943]]

Tahmin Edilen Matris:
[[5.01509957 2.86878776 1.14663046]
 [3.9345313  2.15215506 0.81585733]
 [1.00462823 4.9824625  3.97536434]]

RMSE: 0.10605390590471613
