In [1]:
import sys
import pickle
import pandas as pd
from pathlib import Path
import optuna
sys.path.append('../app/utils')

from get_test import get_test
from metrics import precision_at_k
from scipy.sparse import load_npz, save_npz
from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares

In [2]:
datasets_path = Path.cwd().parent / 'datasets'

In [3]:
data_path = Path.cwd().parent / 'app' / 'data'

In [4]:
test = pd.read_csv(datasets_path / 'test.csv.zip', compression='zip')
result = test.groupby('user_id')['song'].unique().reset_index().rename(columns={'song': 'actual'})

In [21]:
user_item_matrix = load_npz(data_path / 'train_user_item_matrix.npz')
user_item_matrix = user_item_matrix.astype('float').tocsr()

In [22]:
user_item_matrix.shape

(13607, 10000)

In [23]:
with open(data_path / 'userids.pkl', 'rb') as f:
    userids = pickle.load(f)

with open(data_path / 'itemids.pkl', 'rb') as f:
    itemids = pickle.load(f)

userid_to_id = {user_id: idx for idx, user_id in enumerate(userids)}
id_to_userid = {val: key for key, val in userid_to_id.items()}

item_to_id = {item_id: idx for idx, item_id in enumerate(itemids)}
id_to_item = {val: key for key, val in item_to_id.items()}

In [24]:
def get_recommendations(model, user_id, user_items_matrix=user_item_matrix, n=5, *args, **kwargs):
    user_index = userid_to_id[user_id]
    recs = model.recommend(
        userid=user_index, 
        user_items=user_items_matrix[user_index],
        N=n, # кол-во рекомендаций    
        filter_already_liked_items=False,   
        filter_items=None,   
        recalculate_user=False, 
        **kwargs
    )
    
    return [id_to_item[rec] for rec in recs[0]]

In [9]:
with open('../app/models/als_100.pkl', 'rb') as f:
    als = pickle.load(f)

In [36]:
# 1. Define an objective function to be maximized.
def objective(trial):
    # 2. Suggest values for the hyperparameters using a trial object.
    factors = trial.suggest_int('factors', 50, 150)
    regularization =  trial.suggest_float('regularization', 0.001, 0.1)
    als = AlternatingLeastSquares(factors=factors, regularization=regularization, iterations=15)
    als.fit(user_item_matrix, show_progress=False)
    
    result['als'] = result['user_id'].apply(lambda x: get_recommendations(als, x, n=20))
    precision_at_5 = result.apply(lambda row: precision_at_k(row['actual'], row['als'], k=20), axis=1).mean()
    
    return precision_at_5

In [37]:
%%time
# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[32m[I 2023-03-04 13:44:17,766][0m A new study created in memory with name: no-name-b60dbeb9-50eb-45bb-86e2-57ffe30c6725[0m
[32m[I 2023-03-04 13:44:38,186][0m Trial 0 finished with value: 0.08985816124053796 and parameters: {'factors': 105, 'regularization': 0.00745494172750023}. Best is trial 0 with value: 0.08985816124053796.[0m
[32m[I 2023-03-04 13:45:00,738][0m Trial 1 finished with value: 0.08901668258984347 and parameters: {'factors': 95, 'regularization': 0.09901910758006442}. Best is trial 0 with value: 0.08985816124053796.[0m
[32m[I 2023-03-04 13:45:17,652][0m Trial 2 finished with value: 0.08661350775336225 and parameters: {'factors': 75, 'regularization': 0.054581462545222045}. Best is trial 0 with value: 0.08985816124053796.[0m
[32m[I 2023-03-04 13:45:37,795][0m Trial 3 finished with value: 0.08958256779598736 and parameters: {'factors': 102, 'regularization': 0.09166240422707347}. Best is trial 0 with value: 0.08985816124053796.[0m
[32m[I 2023-03-04 13:45:5

CPU times: user 1h 26min 1s, sys: 1h 1min 26s, total: 2h 27min 27s
Wall time: 40min 27s


In [38]:
study.best_params

{'factors': 150, 'regularization': 0.050983849624629776}

In [39]:
als = AlternatingLeastSquares(**study.best_params)
als.fit(user_item_matrix, show_progress=False)

In [40]:
result['als'] = result['user_id'].apply(lambda x: get_recommendations(als, x, n=20))
precision_at_20 = result.apply(lambda row: precision_at_k(row['actual'], row['als'], k=20), axis=1).mean()

In [41]:
precision_at_20

0.09083927390313809

In [42]:
result['als_5'] = result['user_id'].apply(lambda x: get_recommendations(als, x))
precision_at_5 = result.apply(lambda row: precision_at_k(row['actual'], row['als_5']), axis=1).mean()

In [43]:
precision_at_5

0.09999265084147864

In [34]:
precision_at_5

0.10282942603071947

In [44]:
with open('../app/models/als_best_params.pkl', 'wb') as f:
    pickle.dump(als, f)