In [61]:
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import scipy.sparse as sparse
import scipy.sparse.linalg as splg

def biased_als(data, iterations, lmbda, features):
    user_size, item_size = data.shape
    
    X = sparse.csr_matrix(np.random.normal(size = (user_size, features+1)))
    Y = sparse.csr_matrix(np.random.normal(size = (features+1, item_size)))
    
    
    
    beta = np.zeros((1, user_size))
    gamma = np.zeros((1, item_size))
    
    for _ in tqdm(range(iterations)):
        for i in range(item_size):
            
            r_i = data[:, i].T.toarray()
            r_i_beta = r_i - beta
            
            X_wave = X.copy()
            X_wave[:, 0] = np.ones((1, user_size))
            Y_wave = Y.copy()
            Y_wave[0,:] = gamma
            
#             print('I r i shape', r_i.shape)
#             print('I r i beta shape', r_i_beta.shape)
#             print('I X_wave shape', X_wave.shape)
#             print('I Y_wave shape', Y_wave.shape, '\n')
            Y[:,i] = splg.inv(X_wave.T @ X_wave + lmbda * sparse.eye(features+1)) @ X_wave.T @ r_i_beta.T
            
        for u in range(user_size):
            r_u = data[u, :].toarray()
            r_u_gamma = r_u - gamma
            
            Y_wave = Y.copy()
            Y_wave[0, :] = np.ones((item_size, 1))
            X_wave = X.copy()
            X_wave[:,0] = beta
            
#             print('U r u shape', r_u.shape)
#             print('U r u gamma shape', r_u_gamma.shape)
#             print('U X_wave shape', X_wave.shape)
#             print('U Y_wave shape', Y_wave.shape)
#             print('\n\n')
            X[u,:] = splg.inv(Y_wave @ Y_wave.T + lmbda*sparse.eye(features+1)) @ Y_wave @ r_u_gamma.T
            
    return X, Y

In [42]:
df_ratings = pd.read_csv('Final project/ml-1m/ratings.dat', sep="::", header=None)
df_ratings.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

In [43]:
df_ratings.drop('timestamp', axis='columns', inplace=True)
df_ratings.dropna(inplace=True)

In [44]:
df_ratings['movie_num'] = df_ratings['movie_id'].astype("category").cat.codes
item_lookup = df_ratings[['movie_num', 'movie_id']].drop_duplicates()
item_lookup['movie_num'] = item_lookup.movie_num.astype(str)

In [45]:
users = list(np.sort(df_ratings.user_id.unique()))
movies = list(np.sort(df_ratings.movie_num.unique()))
ratings = list(df_ratings.rating)

In [46]:
rows = df_ratings.user_id.astype(int)
cols = df_ratings.movie_num.astype(int)

In [47]:
data_sparse = sparse.csr_matrix(
    (ratings, (rows-1, cols)),
    shape=(len(users), len(movies)))

In [62]:
user_vecs, item_vecs = biased_als(data_sparse, iterations=20, features=20, lmbda=10)

  0%|          | 0/20 [00:00<?, ?it/s]

KeyboardInterrupt: 