In [None]:
from utils import *

In [None]:
%load_ext Cython

In [None]:
data = load_data()
users = load_users()

In [None]:
data, num_users, num_items, mapping_user_id = preprocess_data(data)

In [None]:
data_train, data_validation, data_test = split_data(
    data,
    num_users=num_users,
    num_items=num_items,
    validation_percentage=0.1,
    testing_percentage=0.20
)

In [None]:
%%cython

import numpy as np
import time

from libc.stdlib cimport rand, srand, RAND_MAX

def train_multiple_epochs(URM_train, learning_rate_input, regularization_2_input, n_epochs):

    URM_train_coo = URM_train.tocoo()
    cdef int n_items = URM_train.shape[1]
    cdef int n_interactions = URM_train.nnz
    cdef int[:] URM_train_coo_row = URM_train_coo.row
    cdef int[:] URM_train_coo_col = URM_train_coo.col
    cdef double[:] URM_train_coo_data = URM_train_coo.data
    cdef int[:] URM_train_indices = URM_train.indices
    cdef int[:] URM_train_indptr = URM_train.indptr
    cdef double[:] URM_train_data = URM_train.data

    cdef double[:,:] item_item_S = np.zeros((n_items, n_items), dtype = float)
    cdef double learning_rate = learning_rate_input
    cdef double regularization_2 = regularization_2_input
    cdef double loss = 0.0
    cdef long start_time
    cdef double true_rating, predicted_rating, prediction_error, profile_rating
    cdef int start_profile, end_profile
    cdef int index
    cdef int sample_num
    cdef int user_id
    cdef int item_id
    cdef int profile_item_id

    for n_epoch in range(n_epochs):

        loss = 0.0
        start_time = time.time()

        for sample_num in range(n_interactions):

            # Randomly pick sample
            index = rand() % n_interactions

            user_id = URM_train_coo_row[index]
            item_id = URM_train_coo_col[index]
            true_rating = URM_train_coo_data[index]

            # Compute prediction
            start_profile = URM_train_indptr[user_id]
            end_profile = URM_train_indptr[user_id+1]
            predicted_rating = 0.0

            for index in range(start_profile, end_profile):
                profile_item_id = URM_train_indices[index]
                profile_rating = URM_train_data[index]
                predicted_rating += item_item_S[profile_item_id,item_id] * profile_rating

            # Compute prediction error, or gradient
            prediction_error = true_rating - predicted_rating
            loss += prediction_error**2

            # Update model, in this case the similarity
            for index in range(start_profile, end_profile):
                profile_item_id = URM_train_indices[index]
                profile_rating = URM_train_data[index]
                item_item_S[profile_item_id,item_id] += learning_rate * (prediction_error * profile_rating -
                                                                         regularization_2 * item_item_S[profile_item_id,item_id])

            # Ensure diagonal is always zero
            item_item_S[item_id,item_id] = 0.0

        #             if sample_num % 1000000 == 0:
        #                 print("Epoch {}: {:.2f}%".format(n_epoch+1, sample_num/n_interactions*100))


        elapsed_time = time.time() - start_time
        samples_per_second = (sample_num+1)/elapsed_time
        print("\tEpoch {} complete in in {:.2f} seconds, loss is {:.3E}. Samples per second {:.2f}".format(n_epoch+1, time.time() - start_time, loss/(sample_num+1), samples_per_second))

    return np.array(item_item_S)

In [None]:
from typing import Optional

class SLIMMSE(object):
    def __init__(self, learning_rate: float, regularization: float):
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.weights = None
        
    def fit(self, urm_train: sp.csr_matrix, n_epochs: int):
        self.weights = train_multiple_epochs(urm_train, self.learning_rate, self.regularization, n_epochs)
        
    def recommend(self, user_id: int, urm_train: sp.csr_matrix, at: Optional[int] = None, remove_seen: bool = True):
        user_profile = urm_train[user_id]

        ranking = user_profile.dot(self.weights).flatten()

        if remove_seen:
            user_profile_start = urm_train.indptr[user_id]
            user_profile_end = urm_train.indptr[user_id+1]

            seen_items = urm_train.indices[user_profile_start:user_profile_end]

            ranking[seen_items] = -np.inf

        ranking = np.flip(np.argsort(ranking))
        return ranking[:at]

In [None]:
def hyperparameter_tuning():
    learning_rates =    [4e-5]
    regularizations =   [4e-5, 8e-5, 4e-4, 8e-4, 4e-3, 8e-3]
    epochs = [1000]
    
    results = []
    for lr in learning_rates:
        for reg in regularizations:
            for eps in epochs:
                print(f'lr={lr}, reg={reg}, eps={eps}')
        
                slimmse_recommender = SLIMMSE(lr, reg)
                slimmse_recommender.fit(data_train, eps)
            
                ev_precision, ev_recall, ev_map, _, _ = evaluator(slimmse_recommender, data_train, data_validation)
                
                results.append(((lr, reg, eps), (ev_precision, ev_recall, ev_map)))
                
                del slimmse_recommender
        
    return results

In [None]:
hyperparameter_results = hyperparameter_tuning()

In [None]:
for (lr, reg, eps), (ev_precision, ev_recall, ev_map) in hyperparameter_results:
    print(f'{lr},{reg},{eps},{ev_precision},{ev_recall},{ev_map}')

In [None]:
best_learning_rate = 7e-05
best_regularization = 7e-03
best_epochs = 50
data_train_validation = data_train + data_validation

In [None]:
best_recommender = SLIMMSE(best_learning_rate, best_regularization)
best_recommender.fit(data_train_validation, best_epochs)

In [None]:
write_submission(prepare_submission(data, users, data_train, best_recommender))