In [106]:
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import scipy.sparse as sparse

def biased_als(data, iterations, lmbda, features):
    user_size, item_size = data.shape
    
    
    X = sparse.csr_matrix(np.random.normal(size = (user_size, features)))
    Y = sparse.csr_matrix(np.random.normal(size = (item_size, features)))
    
    beta = np.zeros((1, user_size))
    gamma = np.zeros((item_size, 1))
    
    for _ in tqdm(range(iterations)):
        for i in range(item_size):
            r_i = data[:, i].T.toarray()
            r_i_beta = r_i - beta
            
            X = (sparse.hstack([np.ones((user_size, 1)), X]))
            Y = sparse.csr_matrix(sparse.hstack([gamma, Y]))
            
            Y[i] = np.linalg.inv(X.T @ X + lmbda * np.eye(features+1)) @ X.T @ r_i_beta.T
            
        for u in range(user_size):
            
            r_u = data[u, :].toarray()
            
            r_u_gamma = r_u - gamma
            
            Y = sparse.hstack([np.ones((1, item_size)), Y])
            
            X = (sparse.hstack([beta, X.T])).T
            
            X[u] = (Y.T @ Y + lmbda*I)**-1 @ Y.T @ r_u_gamma
            
    return X, Y

In [76]:
df_ratings = pd.read_csv('ml-1m/ratings.dat', sep="::", header=None)
df_ratings.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

In [77]:
df_ratings.drop('timestamp', axis='columns', inplace=True)
df_ratings.dropna(inplace=True)

In [78]:
df_ratings['movie_num'] = df_ratings['movie_id'].astype("category").cat.codes
item_lookup = df_ratings[['movie_num', 'movie_id']].drop_duplicates()
item_lookup['movie_num'] = item_lookup.movie_num.astype(str)

In [79]:
users = list(np.sort(df_ratings.user_id.unique()))
movies = list(np.sort(df_ratings.movie_num.unique()))
ratings = list(df_ratings.rating)

In [80]:
rows = df_ratings.user_id.astype(int)
cols = df_ratings.movie_num.astype(int)

In [81]:
data_sparse = sparse.csr_matrix(
    (ratings, (rows-1, cols)),
    shape=(len(users), len(movies)))

In [107]:
user_vecs, item_vecs = biased_als(data_sparse, iterations=20, features=20, lmbda=10)

  0%|          | 0/20 [00:00<?, ?it/s]

X.shape (6040, 21)
r_i_beta.shape (1, 6040)
X.T@X shape (21, 21)
X.shape (6040, 22)
r_i_beta.shape (1, 6040)
X.T@X shape (22, 22)


  self._set_arrayXarray(i, j, x)


ValueError: operands could not be broadcast together with remapped shapes [original->remapped]: (21,21)  and requested shape (22,22)

numpy.ndarray