In [35]:
from scipy.sparse import csr_matrix
import pandas as pd
import implicit
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

In [36]:
# Загрузка и предобработка данных
train = pd.read_parquet("train_interactions.parquet")
train = train[train['like'] + train['dislike'] >= 1]

# Разделяем данные на train и validation до создания столбца weight
train_data, val_data = train_test_split(train, test_size=0.2, random_state=42)


In [37]:
# Загрузка метаданных
items_meta = pd.read_parquet("items_meta.parquet")
users_meta = pd.read_parquet("users_meta.parquet")
n_items = items_meta['item_id'].max() + 1
n_users = users_meta['user_id'].max() + 1

In [38]:
# Создаем столбец weight для train_data
train_data['weight'] = train_data['like'] - train_data['dislike']
train_sparse = csr_matrix((train_data['weight'], 
                          (train_data['user_id'], 
                           train_data['item_id'])),
                         shape=(n_users, n_items))

In [39]:
# Обучаем модель
model = implicit.als.AlternatingLeastSquares(factors=16, 
                                           iterations=10, 
                                           regularization=1, 
                                           alpha=100,
                                           calculate_training_loss=True)
model.fit(train_sparse)

100%|██████████| 10/10 [00:22<00:00,  2.21s/it, loss=0.0062] 


In [40]:
# Получаем предсказания для валидационной выборки
val_predictions = (model.user_factors[val_data['user_id'].values] * 
                  model.item_factors[val_data['item_id'].values]).sum(axis=1)

# Создаем бинарную метку: 1 если like > dislike, 0 иначе
val_true = (val_data['like'] > val_data['dislike']).astype(int)

# Вычисляем ROC AUC
roc_auc = roc_auc_score(val_true, val_predictions)
print(f'Validation ROC AUC: {roc_auc:.4f}')