
# 03 - Prueba rápida de evaluación (Parte 3)
Baselines + métricas + evaluación con modelo dummy.


In [1]:

import sys, pathlib
import torch
import numpy as np

# Asegurar path del repo
ROOT = pathlib.Path.cwd()
if (ROOT / 'config_dataset.py').exists():
    sys.path.append(str(ROOT))
elif (ROOT.parent / 'config_dataset.py').exists():
    ROOT = ROOT.parent
    sys.path.append(str(ROOT))

from config_dataset import NUM_ITEMS
from src.evaluation.metrics import hit_rate_at_k, ndcg_at_k, mrr
from src.evaluation.evaluate import evaluate_model
from src.models.baselines import PopularityRecommender
from src.data.load_data import load_training_data
from src.data.preprocessing import create_dt_dataset

print(f"NUM_ITEMS: {NUM_ITEMS}")


NUM_ITEMS: 472



## Métricas con logits dummy


In [2]:

# logits/predicciones dummy
torch.manual_seed(0)
preds = torch.randn(2, 5)
targets = torch.tensor([1, 3])
print('HR@3:', hit_rate_at_k(preds, targets, k=3))
print('NDCG@3:', ndcg_at_k(preds, targets, k=3))
print('MRR:', mrr(preds, targets))


HR@3: 0.5
NDCG@3: 0.25
MRR: 0.2916666865348816



## Baseline de Popularidad


In [3]:

# Crear trayectorias de ejemplo
df_small = load_training_data().head(5)
trajectories = create_dt_dataset(df_small)

pop_rec = PopularityRecommender(num_items=NUM_ITEMS)
pop_rec.fit(trajectories)
history = trajectories[0]['items'][:5]
print('Historial ejemplo:', history)
print('Recs top-5:', pop_rec.recommend(history, k=5))


Historial ejemplo: [417 302 125 279 235]
Recs top-5: [0, 390, 117, 189, 371]



## Evaluación con modelo dummy


In [4]:

# Modelo dummy que produce logits aleatorios
class DummyModel(torch.nn.Module):
    def __init__(self, num_items=NUM_ITEMS, context_length=3):
        super().__init__()
        self.num_items = num_items
        self.context_length = context_length
    def forward(self, states, actions, rtg, timesteps, groups):
        B, L = states.shape
        return torch.randn(B, L, self.num_items)

model = DummyModel()

# Usuario de test de ejemplo
items = list(range(NUM_ITEMS)) if NUM_ITEMS < 30 else list(range(30))
ratings = [1] * len(items)
test_user = {'group': 0, 'items': items, 'ratings': ratings}

metrics = evaluate_model(model, [test_user], device=torch.device('cpu'), k_list=(3, 5))
print(metrics)


{'HR@3': 0.0, 'HR@5': 0.0, 'NDCG@3': 0.0, 'NDCG@5': 0.0, 'MRR': 0.01727793138060305}
