In [1]:
import os

os.environ["OPENBLAS_NUM_THREADS"] = "1"  # For implicit ALS

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import numpy as np
import optuna


from implicit.als import AlternatingLeastSquares

from rectools.metrics import MAP, calc_metrics
from rectools import Columns
from rectools.dataset import Dataset
from rectools.models import ImplicitALSWrapperModel

from pathlib import Path


# Read and Preprocess Data

In [24]:
DATA_PATH = Path("../data")
Columns.Datetime = "last_watch_dt"

users = pd.read_csv(DATA_PATH / "users.csv")
items = pd.read_csv(DATA_PATH / "items.csv")
interactions = pd.read_csv(DATA_PATH / "interactions.csv").iloc[:100_000]

In [25]:
interactions[Columns.Datetime] = pd.to_datetime(interactions[Columns.Datetime], format="%Y-%m-%d")
interactions[Columns.Weight] = np.where(interactions["watched_pct"] > 10, 3, 1)
max_date = interactions[Columns.Datetime].max()
train = interactions[interactions[Columns.Datetime] < max_date - pd.Timedelta(days=7)].copy()
test = interactions[interactions[Columns.Datetime] >= max_date - pd.Timedelta(days=7)].copy()

print(f"train: {train.shape}")
print(f"test: {test.shape}")

train: (9120, 6)
test: (880, 6)


In [26]:
train.drop(train.query("total_dur < 300").index, inplace=True)
cold_users = set(test[Columns.User]) - set(train[Columns.User])
test.drop(test[test[Columns.User].isin(cold_users)].index, inplace=True)

In [27]:
dataset = Dataset.construct(interactions_df=train)

# Fixed hyperparams

In [28]:
K_RECOS = 10
RANDOM_STATE = 42
NUM_THREADS = 8

In [30]:
k = 10

metrics_name = {
    "MAP": MAP,
}
metrics = {f"{metric_name}@{k}": metric(k=k) for metric_name, metric in metrics_name.items()}

In [31]:
metrics

{'MAP@10': MAP(k=10, divide_by_k=False)}

# Hyperparameters tuning

In [35]:
def objective(trial, dataset, train, test):
    n_factors = trial.suggest_categorical("n_factors", (4, 20, 50, 100))
    regularizations = trial.suggest_categorical("regularization", (0.01, 0.05, 0.1))
    iterations = trial.suggest_categorical("iterations", (15, 5, 10))

    model = ImplicitALSWrapperModel(
        model=AlternatingLeastSquares(
            factors=n_factors,
            regularization=regularizations,
            random_state=RANDOM_STATE,
            iterations=iterations,
        ),
    )

    model.fit(dataset)
    recs = model.recommend(
        users=test[Columns.User].unique(),
        dataset=dataset,
        k=K_RECOS,
        filter_viewed=True,
    )
    metrics_vals = calc_metrics(metrics, recs, test, train)
    return metrics_vals["MAP@10"]


study = optuna.create_study(direction="maximize")

[I 2023-12-05 23:42:27,180] A new study created in memory with name: no-name-6acb25a5-8c90-4829-814c-945d0e00a09b


In [40]:
study.optimize(lambda trial: objective(trial, dataset, train, test), n_trials=50)

[I 2023-12-05 23:43:15,928] Trial 0 finished with value: 0.021428571428571432 and parameters: {'n_factors': 4, 'regularization': 0.1, 'iterations': 15}. Best is trial 0 with value: 0.021428571428571432.
[I 2023-12-05 23:43:16,883] Trial 1 finished with value: 0.0 and parameters: {'n_factors': 100, 'regularization': 0.1, 'iterations': 10}. Best is trial 0 with value: 0.021428571428571432.
[I 2023-12-05 23:43:17,657] Trial 2 finished with value: 0.0 and parameters: {'n_factors': 100, 'regularization': 0.01, 'iterations': 10}. Best is trial 0 with value: 0.021428571428571432.
[I 2023-12-05 23:43:18,049] Trial 3 finished with value: 0.0 and parameters: {'n_factors': 100, 'regularization': 0.01, 'iterations': 5}. Best is trial 0 with value: 0.021428571428571432.
[I 2023-12-05 23:43:18,434] Trial 4 finished with value: 0.0 and parameters: {'n_factors': 100, 'regularization': 0.01, 'iterations': 5}. Best is trial 0 with value: 0.021428571428571432.
[I 2023-12-05 23:43:18,950] Trial 5 finished

In [44]:
best_params = study.best_params
best_value = study.best_value
print(best_params, best_value)

{'n_factors': 100, 'regularization': 0.05, 'iterations': 15} 0.07142857142857142
