In [None]:
import polars as pl
from pathlib import Path
from polimi.utils._urm import train_recommender
from RecSys_Course_AT_PoliMi.Recommenders.GraphBased.P3alphaRecommender import P3alphaRecommender
from RecSys_Course_AT_PoliMi.Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from RecSys_Course_AT_PoliMi.Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from RecSys_Course_AT_PoliMi.Recommenders.KNN.UserKNNCFRecommender import UserKNNCFRecommender
from RecSys_Course_AT_PoliMi.Recommenders.MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_AsySVD_Cython, MatrixFactorization_BPR_Cython
from RecSys_Course_AT_PoliMi.Recommenders.MatrixFactorization.PureSVDRecommender import PureSVDRecommender, PureSVDItemRecommender
from RecSys_Course_AT_PoliMi.Recommenders.Neural.MultVAERecommender import MultVAERecommender
from RecSys_Course_AT_PoliMi.Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender, MultiThreadSLIM_SLIMElasticNetRecommender
from RecSys_Course_AT_PoliMi.Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from RecSys_Course_AT_PoliMi.Recommenders.MatrixFactorization.NMFRecommender import NMFRecommender

In [None]:
dpath = Path('../../dataset')

dtype = 'small'
articles = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/articles.parquet')

behaviors_train = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/train/behaviors.parquet')
history_train = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/train/history.parquet')

behaviors_val = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/validation/behaviors.parquet')
history_val = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/validation/history.parquet')

In [None]:
from polimi.utils._custom import load_sparse_csr

ner_path = dpath.joinpath('urm').joinpath('ner').joinpath('small')
algo_path = ner_path.joinpath('algo').joinpath('train')

URM_train = load_sparse_csr(ner_path.joinpath('URM_train.npz'))
URM_train_val = load_sparse_csr(ner_path.joinpath('URM_train_val.npz'))
URM_val =  load_sparse_csr(ner_path.joinpath('URM_validation.npz'))
URM_test = load_sparse_csr(ner_path.joinpath('URM_test.npz'))


In [None]:
from polimi.utils._custom import load_best_optuna_params

rp3params = load_best_optuna_params('RP3betaRecommender-ner-small-ndcg100')
userknnparams = load_best_optuna_params('UserKNNCFRecommender-ner-small-ndcg100')
itemknnparams = load_best_optuna_params('ItemKNNCFRecommender-ner-small-ndcg100')
puresvditemparams = load_best_optuna_params('PureSVDItemRecommender-ner-small-ndcg100')
rp3params, userknnparams, itemknnparams, puresvditemparams

In [None]:

# train_recommender(URM_train, RP3betaRecommender, rp3params, file_name='RP3betaRecommender-ner-small-ndcg100', output_dir=algo_path)
# train_recommender(URM_train, UserKNNCFRecommender, userknnparams, file_name='UserKNNCFRecommender-ner-small-ndcg100', output_dir=algo_path)
# train_recommender(URM_train, ItemKNNCFRecommender, itemknnparams, file_name='ItemKNNCFRecommender-ner-small-ndcg100', output_dir=algo_path)
# train_recommender(URM_train, PureSVDItemRecommender, puresvditemparams, file_name='PureSVDItemRecommender-ner-small-ndcg100', output_dir=algo_path)

# Build rec sys features

In [None]:
rp3beta = RP3betaRecommender(URM_train)
user_knn = UserKNNCFRecommender(URM_train)
# item_knn = ItemKNNCFRecommender(URM_train)
# pure_svd_item = PureSVDItemRecommender(URM_train)
rp3beta.load_model(str(algo_path), file_name='RP3betaRecommender-ner-small-ndcg100')
user_knn.load_model(str(algo_path), file_name='UserKNNCFRecommender-ner-small-ndcg100')
# item_knn.load_model(str(algo_path), file_name='ItemKNNCFRecommender-ner-small-ndcg100')
# pure_svd_item.load_model(str(algo_path), file_name='PureSVDItemRecommender-ner-small-ndcg100')

In [None]:
from polimi.utils._urm import build_ner_mapping, build_user_id_mapping, build_articles_with_processed_ner, _build_batch_ner_interactions
user_id_mapping = build_user_id_mapping(history_train.vstack(history_val))
ap = build_articles_with_processed_ner(articles)
ner_mapping = build_ner_mapping(ap)
ap = ap.with_columns(
    pl.col('ner_clusters').list.eval(pl.element().replace(ner_mapping['ner'], ner_mapping['ner_index'], default=None).drop_nulls()).alias('ner_clusters_index'),
)

In [None]:
train_ds = behaviors_train
train_ds = train_ds\
    .rename({'article_ids_inview': 'candidate_ids'})\
    .with_columns(
        pl.col('candidate_ids').list.eval(pl.element().replace(ap['article_id'], ap['ner_clusters_index'], default=None).drop_nulls()).alias('candidate_ner_index'),
        pl.col('user_id').replace(user_id_mapping['user_id'], user_id_mapping['user_index'], default=None).alias('user_index')
    ).select('impression_id', 'user_id', 'user_index', 'candidate_ids', 'candidate_ner_index')
train_ds.head(2)

In [None]:
all_users = user_id_mapping['user_index'].unique().sort().to_list()
all_items = ner_mapping['ner_index'].unique().sort().to_list()
# rp3betascores = rp3beta._compute_item_score(all_users)
# userknnparamscores = user_knn._compute_item_score(all_users)
# userknnparamscores.shape, rp3betascores.shape

In [None]:
from tqdm import tqdm
train_ds = pl.concat([
    slice.explode(['candidate_ids', 'candidate_ner_index'])\
        .filter(pl.col('candidate_ner_index').list.len() > 0)\
        .group_by(['impression_id', 'user_id', 'user_index']).agg(pl.all())
    for slice in tqdm(train_ds.iter_slices(10000), total=train_ds.shape[0]//10000)
])
train_ds.head(2)

In [None]:
from tqdm import tqdm
recs = [user_knn, rp3beta]

impression_scores = pl.concat([
    slice.with_columns(
            *[pl.col('candidate_ner_index').list.eval(
                pl.element().list.eval(pl.element().replace(all_items, rec._compute_item_score(user_index)[0], default=None))
            ).alias(f"{rec.RECOMMENDER_NAME}_scores") for rec in recs]
        ).drop('user_index', 'candidate_ner_index')
for user_index, slice in tqdm(train_ds.partition_by(['user_index'], as_dict=True).items(), total=train_ds['user_index'].n_unique())
])
impression_scores.head(3)

  1%|          | 151/15143 [00:15<05:06, 48.84it/s]

In [None]:
from polimi.utils._polars import reduce_polars_df_memory_size
scores_cols = [col for col in impression_scores.columns if '_scores' in col]
df = impression_scores.with_columns(
        *[pl.col(col).list.eval(pl.element().list.sum()).alias(f'sum_{col}') for col in scores_cols],
        *[pl.col(col).list.eval(pl.element().list.max()).alias(f'max_{col}') for col in scores_cols],
        *[pl.col(col).list.eval(pl.element().list.mean()).alias(f'mean_{col}') for col in scores_cols],
).with_columns(
    pl.all().exclude(['impression_id', 'user_id', 'candidate_ids'] + scores_cols).list.eval(pl.element().truediv(pl.element().max()).fill_nan(0.0)), #inf norm
).drop(scores_cols)

df = reduce_polars_df_memory_size(df)
df = df.sort(['impression_id', 'user_id'])\
    .explode(pl.all().exclude(['impression_id', 'user_id']))\
    .rename({'candidate_ids': 'article'})

df.head(3)

# Test API

In [None]:
from polimi.utils._urm import build_ner_scores_features, load_recommender

load_dict = {
    RP3betaRecommender: {'path': algo_path, 'file_name': 'rp3beta'},
    UserKNNCFRecommender: {'path': algo_path, 'file_name': 'userknn'},
}

recs = []
for rec, load_info in load_dict.items():
    recs.append(load_recommender(URM_train, rec, file_path=str(load_info['path']), file_name=load_info['file_name']))


df = build_ner_scores_features(history=history_train, behaviors=behaviors_train, articles=articles, recs=recs)
df.head(3)

# Evaluate Models

In [14]:
rp3beta = RP3betaRecommender(URM_train)
user_knn = UserKNNCFRecommender(URM_train)
# item_knn = ItemKNNCFRecommender(URM_train)
# pure_svd_item = PureSVDItemRecommender(URM_train)
rp3beta.load_model(str(algo_path), file_name='RP3betaRecommender-ner-small-ndcg100')
user_knn.load_model(str(algo_path), file_name='UserKNNCFRecommender-ner-small-ndcg100')
# item_knn.load_model(str(algo_path), file_name='ItemKNNCFRecommender-ner-small-ndcg100')
# pure_svd_item.load_model(str(algo_path), file_name='PureSVDItemRecommender-ner-small-ndcg100')

RP3betaRecommender: URM Detected 3685 (19.6%) users with no interactions.
RP3betaRecommender: URM Detected 18909 (43.2%) items with no interactions.
UserKNNCFRecommender: URM Detected 3685 (19.6%) users with no interactions.
UserKNNCFRecommender: URM Detected 18909 (43.2%) items with no interactions.
RP3betaRecommender: Loading model from file '../../dataset/urm/ner/small/algo/train/RP3betaRecommender-ner-small-ndcg100'
RP3betaRecommender: Loading complete
UserKNNCFRecommender: Loading model from file '../../dataset/urm/ner/small/algo/train/UserKNNCFRecommender-ner-small-ndcg100'
UserKNNCFRecommender: Loading complete


In [15]:
from RecSys_Course_AT_PoliMi.Evaluation.Evaluator import EvaluatorHoldout

cutoff = 100
metric = 'NDCG'
evaluator = EvaluatorHoldout(URM_val, cutoff_list=[cutoff], exclude_seen=False)
test_evaluator = EvaluatorHoldout(URM_test, cutoff_list=[cutoff], exclude_seen=False)

EvaluatorHoldout: Ignoring 3715 (19.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 3517 (18.7%) Users that have less than 1 test interactions


In [16]:
# user_knn = UserKNNCFRecommender(URM_train)
# user_knn.fit()
result_df, _ = evaluator.evaluateRecommender(user_knn)
result_df.loc[cutoff][metric.upper()]

EvaluatorHoldout: Processed 15112 (100.0%) in 29.11 sec. Users per second: 519


0.2736178149551304

In [None]:
user_knn = UserKNNCFRecommender(URM_train_val)
user_knn.fit(**userknnparams)
result_df, _ = test_evaluator.evaluateRecommender(user_knn)
result_df.loc[cutoff][metric.upper()]