In [1]:
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

from src.data.loader import load_interactions, load_games
from src.evaluation.splitter import split_train_in_out
from src.evaluation.evaluator import evaluate_model
from src.models.ease import EASE
from src.novelty.distance import (
    build_genre_similarity_matrix,
    build_genre_distance_matrix,
)
import numpy as np
from src.pipelines.save import save_submission
from src.config import LAMBDA_REG, TOP_K, N_EVAL_USERS, SEED, POP_ALPHA, MIN_PLAYTIME

# PART 1: OFFLINE EVALUATION

In [None]:
# 1. Load data
train = load_interactions(train=True)
games = load_games()

In [None]:
# 2. Local split (Codabench-like: 1 holdout per user)
train_in_full, train_out_full = split_train_in_out(train, seed=SEED)

In [None]:
# 3. Sample a subset of users for faster offline eval
all_users = train_out_full["user_id"].unique()          # only users with a holdout
rng = np.random.default_rng(SEED)
n_eval = min(N_EVAL_USERS, len(all_users))
print("Total users:", len(all_users), "-> Subsampling", n_eval, "users...")
sample_users = rng.choice(all_users, size=n_eval, replace=False)

In [None]:
train_in = train_in_full[train_in_full["user_id"].isin(sample_users)].reset_index(drop=True)
train_out = train_out_full[train_out_full["user_id"].isin(sample_users)].reset_index(drop=True)

print(f"Offline eval on {n_eval} users "
      f"(train_in rows = {len(train_in)}, train_out rows = {len(train_out)})")

In [None]:
# 5. Fit EASE on *fold-in* for these users and get recommendations
model = EASE(lambda_reg=float(LAMBDA_REG), alpha_pop=float(POP_ALPHA))

# train_in = history (fold-in) for sampled users
# test_in = same df; we score these users
recs_offline = model.recommend(
    train_in_full,
    train_out_full,
    top_k=TOP_K,
)

In [None]:
# 6. Evaluate offline baseline on the sampled users
metrics_offline = evaluate_model(
    recs_offline,
    train_in,
    train_out,

    # maps item_id -> publisher (needed for publisher Gini)
    publisher_mapper=games.set_index("item_id")["publisher"],
    item_similarity=build_genre_similarity_matrix(games),
    item_distance=build_genre_distance_matrix(games),
)
print(f"Offline EASE baseline (lambda_reg = {LAMBDA_REG}, pop_alpha = {POP_ALPHA}) on {n_eval} users")
print(metrics_offline)

# PART 2: ONLINE EVALUATION (codabench)

In [2]:
# 1. Load full training and official test fold-in
train_full = load_interactions(train=True)
test_in = load_interactions(train=False)

In [3]:
# 2. New model instance, same lambda
model_cb = EASE(lambda_reg=float(LAMBDA_REG), alpha_pop=float(POP_ALPHA))

In [4]:
# 3. train_full is used to fit; test_in defines which users we score
recs_codabench = model_cb.recommend(train_full, test_in, top_k=TOP_K, min_playtime=MIN_PLAYTIME)

In [5]:
# Quick sanity check
print(recs_codabench.head(n=40))

    user_id  item_id     score  rank
0         4     8213  1.111279     1
1         4     1043  1.066335     2
2         4      307  1.049700     3
3         4     5888  0.970818     4
4         4      658  0.902099     5
5         4      450  0.898762     6
6         4      252  0.855349     7
7         4      344  0.843446     8
8         4     8327  0.822922     9
9         4       90  0.797281    10
10        4      662  0.759801    11
11        4     7996  0.750572    12
12        4      389  0.739966    13
13        4      679  0.730180    14
14        4        0  0.719847    15
15        4      328  0.690355    16
16        4      539  0.674564    17
17        4     8017  0.671392    18
18        4      579  0.657211    19
19        4      647  0.650941    20
20        6     1072  0.828381     1
21        6     8278  0.674180     2
22        6     7711  0.648797     3
23        6     2707  0.605328     4
24        6      669  0.593188     5
25        6      650  0.581105     6
2

In [6]:
# 7. Save submission CSV (this writes submission_name.csv in notebooks or pipelines location)
submission_name = f"ease_lambda_{LAMBDA_REG}_pop-alpha_{str(POP_ALPHA).replace('.', '_')}_final"
save_submission(recs_codabench, submission_name)
print(f"Saved Codabench submission as {submission_name}.csv")

Saved CSV: /home/sunaj/Desktop/novelty-aware-recommenders/results/submissions/ease_lambda_2000_pop-alpha_0_18_final.csv
Saved ZIP for Codabench: /home/sunaj/Desktop/novelty-aware-recommenders/results/submissions/ease_lambda_2000_pop-alpha_0_18_final.csv.zip
Saved Codabench submission as ease_lambda_2000_pop-alpha_0_18_final.csv
