In [1]:
import pandas as pd
import numpy as np
import optuna


from implicit.als import AlternatingLeastSquares

# Not compatible with rectools wrapper
# from implicit.approximate_als import (
#     AnnoyAlternatingLeastSquares,
#     FaissAlternatingLeastSquares,
# )

from rectools.metrics import MAP, calc_metrics
from rectools import Columns
from rectools.dataset import Dataset
from rectools.models import ImplicitALSWrapperModel
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_PATH = Path("../data")
Columns.Datetime = "last_watch_dt"

users = pd.read_csv(DATA_PATH / "users.csv")
items = pd.read_csv(DATA_PATH / "items.csv")
interactions = pd.read_csv(DATA_PATH / "interactions.csv").iloc[:100_000]

In [3]:
interactions[Columns.Datetime] = pd.to_datetime(interactions[Columns.Datetime], format="%Y-%m-%d")
interactions[Columns.Weight] = np.where(interactions["watched_pct"] > 10, 3, 1)
max_date = interactions[Columns.Datetime].max()
train = interactions[interactions[Columns.Datetime] < max_date - pd.Timedelta(days=7)].copy()
test = interactions[interactions[Columns.Datetime] >= max_date - pd.Timedelta(days=7)].copy()

print(f"train: {train.shape}")
print(f"test: {test.shape}")

train: (91015, 6)
test: (8985, 6)


In [4]:
train.drop(train.query("total_dur < 300").index, inplace=True)
cold_users = set(test[Columns.User]) - set(train[Columns.User])
test.drop(test[test[Columns.User].isin(cold_users)].index, inplace=True)


In [5]:
dataset = Dataset.construct(interactions_df=train)

In [6]:
K_RECOS = 10

In [7]:
metrics_name = {
    "MAP": MAP,
}
metrics = {f"{metric_name}@{K_RECOS}": metric(k=K_RECOS) for metric_name, metric in metrics_name.items()}

In [8]:
def objective(trial, dataset, train, test):
    n_factors = trial.suggest_categorical("n_factors", (5, 10, 25, 50, 100, 200))
    regularizations = trial.suggest_categorical("regularization", (0.01, 0.05, 0.1))
    iterations = trial.suggest_categorical("iterations", (5, 10, 25, 100))

    als_model = AlternatingLeastSquares(
        factors=n_factors,
        regularization=regularizations,
        random_state=42,
        iterations=iterations,
    )

    model = ImplicitALSWrapperModel(
        model=als_model,
    )

    model.fit(dataset)
    recs = model.recommend(
        users=test[Columns.User].unique(),
        dataset=dataset,
        k=K_RECOS,
        filter_viewed=True,
    )
    metrics_vals = calc_metrics(metrics, recs, test, train)
    return metrics_vals["MAP@10"]


study = optuna.create_study(direction="maximize")



[I 2023-12-12 18:21:44,577] A new study created in memory with name: no-name-fe6c15c7-68ea-4366-914e-bc236b83354e


In [9]:
study.optimize(lambda trial: objective(trial, dataset, train, test), n_trials=50)

  check_blas_config()
[I 2023-12-12 18:21:57,528] Trial 0 finished with value: 0.003258327451875839 and parameters: {'n_factors': 100, 'regularization': 0.05, 'iterations': 10}. Best is trial 0 with value: 0.003258327451875839.
[I 2023-12-12 18:22:07,883] Trial 1 finished with value: 0.011019083599728762 and parameters: {'n_factors': 10, 'regularization': 0.1, 'iterations': 25}. Best is trial 1 with value: 0.011019083599728762.
[I 2023-12-12 18:22:36,113] Trial 2 finished with value: 0.017357218970122195 and parameters: {'n_factors': 5, 'regularization': 0.05, 'iterations': 100}. Best is trial 2 with value: 0.017357218970122195.
[I 2023-12-12 18:23:30,374] Trial 3 finished with value: 0.00766751774816291 and parameters: {'n_factors': 25, 'regularization': 0.1, 'iterations': 100}. Best is trial 2 with value: 0.017357218970122195.
[I 2023-12-12 18:24:36,749] Trial 4 finished with value: 0.0066969042775494375 and parameters: {'n_factors': 50, 'regularization': 0.1, 'iterations': 100}. Bes

In [10]:
best_params = study.best_params
best_value = study.best_value
print(best_params, best_value)

{'n_factors': 5, 'regularization': 0.01, 'iterations': 10} 0.018981539142829466
