In [1]:
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

from src.data.loader import load_interactions, load_games
from src.evaluation.splitter import split_train_in_out
from src.evaluation.evaluator import evaluate_model
from src.models.ease import EASE
from src.novelty.distance import (
    build_genre_similarity_matrix,
    build_genre_distance_matrix,
)
import numpy as np
from src.pipelines.save import save_submission
from src.config import LAMBDA_REG, TOP_K, N_EVAL_USERS, SEED, POP_ALPHA, MIN_PLAYTIME

# PART 1: OFFLINE EVALUATION

In [8]:
# 1. Load data
train = load_interactions(train=True)
games = load_games()

In [9]:
# 2. Local split (Codabench-like: 1 holdout per user)
train_in_full, train_out_full = split_train_in_out(train, seed=SEED)

In [10]:
# 3. Sample a subset of users for faster offline eval
all_users = train_out_full["user_id"].unique()          # only users with a holdout
rng = np.random.default_rng(SEED)
n_eval = min(N_EVAL_USERS, len(all_users))
print("Total users:", len(all_users), "-> Subsampling", n_eval, "users...")
sample_users = rng.choice(all_users, size=n_eval, replace=False)

Total users: 51749 -> Subsampling 4000 users...


In [11]:
train_in = train_in_full[train_in_full["user_id"].isin(sample_users)].reset_index(drop=True)
train_out = train_out_full[train_out_full["user_id"].isin(sample_users)].reset_index(drop=True)

print(f"Offline eval on {n_eval} users "
      f"(train_in rows = {len(train_in)}, train_out rows = {len(train_out)})")

Offline eval on 4000 users (train_in rows = 176308, train_out rows = 4000)


In [12]:
# 5. Fit EASE on *fold-in* for these users and get recommendations
model = EASE(lambda_reg=float(LAMBDA_REG), alpha_pop=float(POP_ALPHA))

# train_in = history (fold-in) for sampled users
# test_in = same df; we score these users
recs_offline = model.recommend(
    train_in,
    train_in,
    top_k=TOP_K,
)

In [13]:
# 6. Evaluate offline baseline on the sampled users
metrics_offline = evaluate_model(
    recs_offline,
    train_in,
    train_out,

    # maps item_id -> publisher (needed for publisher Gini)
    publisher_mapper=games.set_index("item_id")["publisher"],
    item_similarity=build_genre_similarity_matrix(games),
    item_distance=build_genre_distance_matrix(games),
)
print(f"Offline EASE baseline (lambda_reg = {LAMBDA_REG}, pop_alpha = {POP_ALPHA}) on {n_eval} users")
print(metrics_offline)

Offline EASE baseline (lambda_reg = 300, pop_alpha = 0.18) on 4000 users
{'ndcg': 0.23085379409360848, 'recall': 0.42075, 'user_coverage': 1.0, 'item_gini': 0.8017399742930591, 'publisher_gini': 0.799251428512455, 'item_coverage': 0.06659818524225304, 'intra_list_similarity': 0.45574853, 'novelty': 0.55890197}


# PART 2: ONLINE EVALUATION (codabench)

In [2]:
# 1. Load full training and official test fold-in
train_full = load_interactions(train=True)
test_in = load_interactions(train=False)

In [3]:
# 2. New model instance, same lambda
model_cb = EASE(lambda_reg=float(LAMBDA_REG), alpha_pop=float(POP_ALPHA))

In [4]:
# 3. train_full is used to fit; test_in defines which users we score
recs_codabench = model_cb.recommend(train_full, test_in, top_k=TOP_K, min_playtime=MIN_PLAYTIME)

In [5]:
# Quick sanity check
print(recs_codabench.head(n=40))

    user_id  item_id     score  rank
0         4      307  1.324330     1
1         4     8213  1.066300     2
2         4     1043  1.060740     3
3         4      450  0.976128     4
4         4      658  0.943692     5
5         4     5888  0.916579     6
6         4     7996  0.915643     7
7         4     8327  0.897309     8
8         4        0  0.818902     9
9         4      344  0.803871    10
10        4      252  0.790193    11
11        4     8017  0.787538    12
12        4     1548  0.724871    13
13        4      647  0.717259    14
14        4     7711  0.703810    15
15        4      539  0.702086    16
16        4      328  0.699487    17
17        4      697  0.688767    18
18        4     2195  0.686473    19
19        4       90  0.683934    20
20        6     1072  0.968144     1
21        6     8278  0.833856     2
22        6     7711  0.718240     3
23        6      669  0.661620     4
24        6       46  0.637079     5
25        6     4461  0.625555     6
2

In [14]:
# 7. Save submission CSV (this writes submission_name.csv in notebooks or pipelines location)
submission_name = f"ease_lambda{LAMBDA_REG}_popalpha{str(POP_ALPHA).replace('.', '_')}"
save_submission(recs_codabench, submission_name)
print(f"Saved Codabench submission as {submission_name}.csv")

Saved ZIP for Codabench: /home/sunaj/Desktop/novelty-aware-recommenders/submissions/ease_lambda300_popalpha0_18.csv.zip
Saved Codabench submission as ease_lambda300_popalpha0_18.csv


### Highest Codabench score so far with lambda=300 en pop alpha=0.18

```dict
{
    "user_coverage": 1.0,
    "item_coverage": 0.12331338730493957,
    "item_gini": 0.8880964541360635,
    "publisher_gini": 0.8850049622861488,
    "recall": 0.4529240239609442,
    "ndcg": 0.3930186147152686,
    "ils": 0.29925743512954517,
    "novelty": 0.7498022990977183
}
```