In [1]:
import numpy as np
import pandas as pd

In [2]:
def replace_zero_to_nan(m):
    m = m.astype(float)
    m[m==0] = np.nan
    return m

def calc_rmse(P, Q, R):
    R_pred = np.dot(P, Q.T)
    mse = np.nanmean((R - R_pred)**2)
    rmse = np.sqrt(mse)
    return rmse

In [3]:
df = pd.DataFrame({"user":np.array([[u]*5 for u in [0,1,2,3]]).reshape(-1), 
                   'item':[0,1,2,3,4]*4,
                   'ratings':[2,1,5,4,5, 
                              5, 4, 1, np.nan, 2,
                              1,1,5,2,2,
                              1, np.nan, np.nan, 4, 3]
                  })
R = df.pivot(index='item', columns='user', values='ratings').values

In [4]:
n_user, n_item = R.shape
print(f'# users = {n_user}')
print(f'# item = {n_item}')
np.random.seed(1)

# users = 5
# item = 4


In [None]:
# embedding dimension
K = 3

# train test split, train takes 70% elements at random, other 6 become np.nan
train_ratio = 0.7

# Note since this is not deterministic train set may not consist exactly 70% of the data.
bool_matrix = (np.random.rand(n_user, n_item) < train_ratio)
train_R = replace_zero_to_nan(R * bool_matrix)
test_R = replace_zero_to_nan(R * ~bool_matrix)

In [None]:
# intializing embedding vector from normal distribution with mean=0, std_dev=1/k
P = np.random.normal(loc=0, scale=1.0/K, size=(n_user, K))
Q = np.random.normal(loc=0, scale=1.0/K, size=(n_item, K))

In [None]:
n_steps = 100
lr = 0.01
_lambda = 0.01
regularization = True
N = np.sum(bool_matrix)
for n_step in range(1, n_steps):
    for i in range(n_user):
        for j in range(n_item):
            r_ui = train_R[i, j]
            if np.isnan(r_ui): # skip non-zero element
                continue   
            e_ui = r_ui - np.dot(P[i, :], Q[j, :])
            if not regularization:
                P[i, :] = P[i, :] + lr*e_ui*Q[j, :]
                Q[j, :] = Q[j, :] + lr*e_ui*P[i, :]
                continue

            # Done for user_i with all item embedding vectors that had interactions
            # then move on(all j is done, next i) to next user, repeat the process.
            P[i, :] = P[i, :] + lr * (e_ui*Q[j, :] - _lambda*P[i, :]) # updates user emb vector(P_i) using item emb vector Q_j
            Q[j, :] = Q[j, :] + lr * (e_ui*P[i, :] - _lambda*Q[j, :]) # use updated user emb vector to update Q_j
            
    if n_step % 10 == 0:
        print(f"{n_step} : RMSE =  ", np.round(calc_rmse(P, Q, train_R), 4))
        
print(f"Train RMSE =  ", np.round(calc_rmse(P, Q, train_R), 4))
print(f"Test RMSE =  ", np.round(calc_rmse(P, Q, test_R), 4))

In [None]:
pred_R = np.dot(P, Q.T)

In [None]:
item_list = ["vacuum", 'sweeper', 'laptop', 'mouse', 'keyboard']
user_list = df['user'].unique().tolist()

In [None]:
def preferenece_rec(user):
    """
    Given user output item that user will prefer.
    
    parameters
    ---------
    user : int, user_id
    """
    return item_list[np.argmax(pred_R[:, user])]

In [None]:
for user in user_list:
    rec_item = preferenece_rec(user)
    print(f"user_{user} will like = {rec_item}")