In [2]:
from scipy.sparse import csr_matrix
import polars as pl
import implicit

In [4]:
train = pl.scan_parquet("../train_interactions.parquet")
train = train.filter((pl.col("like") + pl.col("dislike")) >= 1)
train = train.with_columns(weight=pl.col("like") - pl.col("dislike"))
train = train.select("user_id", "item_id", "weight")

In [6]:
train = train.collect()

In [8]:
items_meta = pl.read_parquet("items_meta.parquet")
users_meta = pl.read_parquet("users_meta.parquet")
n_items = items_meta["item_id"].max() + 1
n_users = users_meta["user_id"].max() + 1

In [10]:
train = csr_matrix((train["weight"], 
                    (train["user_id"].to_numpy(), 
                     train["item_id"].to_numpy())),
                   shape=(n_users, n_items))

In [12]:
model = implicit.als.AlternatingLeastSquares(factors=16, 
                                             iterations=10, 
                                             regularization=1, 
                                             alpha=100,
                                             calculate_training_loss=True)
model.fit(train)

  check_blas_config()


  0%|          | 0/10 [00:00<?, ?it/s]

In [14]:
test_pairs = pl.read_csv('test_pairs.csv')
test_pairs

user_id,item_id
i64,i64
1,7363
1,73770
1,75700
1,81204
1,110249
…,…
183403,235097
183403,267411
183403,273646
183403,288305


In [20]:
als_predict = (model.user_factors[test_pairs['user_id']] * 
               model.item_factors[test_pairs['item_id']]).sum(axis=1)

In [22]:
test_pairs.with_columns(predict=als_predict)

In [24]:
test_pairs

user_id,item_id
i64,i64
1,7363
1,73770
1,75700
1,81204
1,110249
…,…
183403,235097
183403,267411
183403,273646
183403,288305
