In [None]:
from scipy.sparse import csr_matrix
import polars as pl
import implicit

In [None]:
train = pl.scan_parquet("train_interactions.parquet")
train = train.filter((pl.col("like") + pl.col("dislike")) >= 1)
train = train.with_columns(weight=pl.col("like") - pl.col("dislike"))
train = train.select("user_id", "item_id", "weight")

In [None]:
train = train.collect()

In [None]:
items_meta = pl.read_parquet("items_meta.parquet")
users_meta = pl.read_parquet("users_meta.parquet")
n_items = items_meta["item_id"].max() + 1
n_users = users_meta["user_id"].max() + 1

In [None]:
train = csr_matrix((train["weight"], 
                    (train["user_id"].to_numpy(), 
                     train["item_id"].to_numpy())),
                   shape=(n_users, n_items))

In [None]:
model = implicit.als.AlternatingLeastSquares(factors=16, 
                                             iterations=10, 
                                             regularization=1, 
                                             alpha=100,
                                             calculate_training_loss=True)
model.fit(train)

In [None]:
test_pairs = pl.read_csv('test_pairs.csv')
test_pairs

In [None]:
als_predict = (model.user_factors[test_pairs['user_id']] * 
               model.item_factors[test_pairs['item_id']]).sum(axis=1)

In [None]:
test_pairs.with_columns(predict=als_predict).write_csv('sample_submission.csv')