In [None]:
from google.colab import drive
drive.mount('/content/drive')
artists_file = '/content/drive/MyDrive/data/artists.txt'
user_artists_file = '/content/drive/MyDrive/data/user_artists.txt'
user_artists_small_file = '/content/drive/MyDrive/data/user_artists_small.txt'

Mounted at /content/drive


In [None]:
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import spsolve

def read_data(file_path):
    data = []
    with open(file_path, 'r') as file:
        next(file)
        for line in file:
            user, item, count = line.strip().split('\t')
            data.append((int(user), int(item), float(count)))
    return data

def read_artists(file_path):
    artist_dict = {}
    with open(file_path, 'r') as file:
        next(file)
        for line in file:
            artist_id, artist_name = line.strip().split('\t')
            artist_dict[int(artist_id)] = artist_name
    return artist_dict

def calculate_sparsity_ratio(data, num_users, num_items):
    num_interactions = len(data)
    num_zeros = num_users * num_items - num_interactions
    sparsity_ratio = num_interactions / num_zeros
    return sparsity_ratio

def initialize_matrices(num_users, num_items, f):
    np.random.seed(45)
    X = np.random.rand(num_users, f) * 0.01
    Y = np.random.rand(num_items, f) * 0.01
    return X, Y

def update_user_matrix(Y, data, num_users, num_items, f, reg_lambda):
    X = np.zeros((num_users, f))
    Y_tilde = Y.T @ Y
    for u in range(num_users):
        Cu_I = np.zeros((num_items, num_items))
        pu = np.zeros(num_items)
        for d in data:
            if d[0] == u:
                i = d[1]
                r = d[2]
                Cu_I[i, i] = alpha * r
                if r > 0:
                    pu[i] = 1
        Y_Cu = Y.T @ Cu_I
        A = Y_tilde + Y_Cu @ Y + reg_lambda * np.eye(f)
        b = Y_Cu @ pu
        xu = spsolve(csr_matrix(A), b)
        X[u] = xu
    return X

def update_item_matrix(X, data, num_users, num_items, f, reg_lambda):
    Y = np.zeros((num_items, f))
    X_tilde = X.T @ X
    for i in range(num_items):
        Ci_I = np.zeros((num_users, num_users))
        pi = np.zeros(num_users)
        for d in data:
            if d[1] == i:
                u = d[0]
                r = d[2]
                Ci_I[u, u] = alpha * r
                if r > 0:
                    pi[u] = 1
        X_Ci = X.T @ Ci_I
        A = X_tilde + X_Ci @ X + reg_lambda * np.eye(f)
        b = X_Ci @ pi
        yi = spsolve(csr_matrix(A), b)
        Y[i] = yi
    return Y

def calculate_objective(X, Y, data, alpha, reg_lambda):
    cost = 0
    for d in data:
        u, i, r = d
        cui = 1 + alpha * r
        pui = 1 if r > 0 else 0
        cost += cui * (pui - X[u] @ Y[i]) ** 2
    regularization = reg_lambda * (np.sum(np.square(X)) + np.sum(np.square(Y)))
    cost += regularization
    return cost

def ALS(data, num_users, num_items, f, alpha, reg_lambda, max_iter, artist_dict):
    X, Y = initialize_matrices(num_users, num_items, f)
    for iter in range(max_iter):
        X = update_user_matrix(Y, data, num_users, num_items, f, reg_lambda)
        Y = update_item_matrix(X, data, num_users, num_items, f, reg_lambda)
        objective = calculate_objective(X, Y, data, alpha, reg_lambda)
        print(f"Iteration {iter + 1}: Objective (Cimplicit) = {objective:.4f}")
    user_id = 1
    user_preferences = X[user_id] @ Y.T
    top_artist_ids = np.argsort(-user_preferences)[:5]
    top_artist_names = [artist_dict[artist_id] for artist_id in top_artist_ids]
    print(f"\nTop 5 artists for user {user_id} after {max_iter} iterations:")
    for artist_name in top_artist_names:
        print(artist_name)
    return X, Y

# Parameters
num_users = 100
num_items = 100
f = 3
reg_lambda = 0.01
max_iter = 100

# Read data
data = read_data(user_artists_small_file)
artist_dict = read_artists(artists_file)

# Calculate sparsity ratio
alpha = calculate_sparsity_ratio(data, num_users, num_items)
print(f"Sparsity ratio: {alpha:.4f}\n")

# Run ALS
X, Y = ALS(data, num_users, num_items, f, alpha, reg_lambda, max_iter, artist_dict)

Sparsity ratio: 0.0272

Iteration 1: Objective (Cimplicit) = 1542.0289
Iteration 2: Objective (Cimplicit) = 887.3567
Iteration 3: Objective (Cimplicit) = 772.4329
Iteration 4: Objective (Cimplicit) = 717.5634
Iteration 5: Objective (Cimplicit) = 677.5370
Iteration 6: Objective (Cimplicit) = 645.1720
Iteration 7: Objective (Cimplicit) = 618.1496
Iteration 8: Objective (Cimplicit) = 595.2172
Iteration 9: Objective (Cimplicit) = 575.4415
Iteration 10: Objective (Cimplicit) = 558.1262
Iteration 11: Objective (Cimplicit) = 542.7853
Iteration 12: Objective (Cimplicit) = 529.0692
Iteration 13: Objective (Cimplicit) = 516.6841
Iteration 14: Objective (Cimplicit) = 505.2837
Iteration 15: Objective (Cimplicit) = 494.3811
Iteration 16: Objective (Cimplicit) = 483.8022
Iteration 17: Objective (Cimplicit) = 474.0700
Iteration 18: Objective (Cimplicit) = 465.4517
Iteration 19: Objective (Cimplicit) = 457.7525
Iteration 20: Objective (Cimplicit) = 450.7145
Iteration 21: Objective (Cimplicit) = 444.15