In [1]:
import os
import sys
import torch
import pandas as pd
import numpy as np

print("PyTorch version:", torch.__version__)
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)


PyTorch version: 2.9.0+cu126
Device: cpu


In [2]:
!git clone https://github.com/tferreyragthb/Ferreyra_Tomas_tp_dt.git

# Confirmar carpetas
!ls /content/Ferreyra_Tomas_tp_dt


Cloning into 'Ferreyra_Tomas_tp_dt'...
remote: Enumerating objects: 354, done.[K
remote: Counting objects: 100% (118/118), done.[K
remote: Compressing objects: 100% (108/108), done.[K
remote: Total 354 (delta 35), reused 0 (delta 0), pack-reused 236 (from 1)[K
Receiving objects: 100% (354/354), 18.81 MiB | 21.19 MiB/s, done.
Resolving deltas: 100% (90/90), done.
notebooks  results  src


In [3]:
print("Carpetas dentro de /content/:")
print(os.listdir("/content"))

# Filtrar carpetas que probablemente sean tu repo
candidatos = [
    d for d in os.listdir("/content")
    if os.path.isdir(os.path.join("/content", d))
    and ("Ferreyra" in d or "tp" in d or "TP" in d or "dt" in d or "DT" in d)
]

print("\nPosibles repos encontrados:")
print(candidatos)


Carpetas dentro de /content/:
['.config', 'Ferreyra_Tomas_tp_dt', 'sample_data']

Posibles repos encontrados:
['Ferreyra_Tomas_tp_dt']


In [4]:
REPO = "/content/Ferreyra_Tomas_tp_dt"

# agregar repo al path
if REPO not in sys.path:
    sys.path.append(REPO)

print("Repo montado en sys.path correctamente.\n")

# verificar carpetas clave
esperadas = ["src", "notebooks", "results"]
print("Verificando estructura...\n")
for e in esperadas:
    ruta = os.path.join(REPO, e)
    print(f"{e}:  {'OK ‚úÖ' if os.path.exists(ruta) else '‚ùå NO ENCONTRADO'}")


Repo montado en sys.path correctamente.

Verificando estructura...

src:  OK ‚úÖ
notebooks:  OK ‚úÖ
results:  OK ‚úÖ


In [5]:
# Nos aseguramos de estar dentro del repo
os.chdir(REPO)

# Crear carpetas esperadas
os.makedirs("data/train", exist_ok=True)
os.makedirs("data/test_users", exist_ok=True)
os.makedirs("data/groups", exist_ok=True)

BASE = "https://raw.githubusercontent.com/DiploDatos/AprendizajePorRefuerzos/master/tp_decision_transformer/data"

train_files = [
    "train/netflix8_train.df",
]

test_files = [
    "test_users/netflix8_test.json",
]

groups_files = [
    "groups/mu_netflix8.csv",
]

# Descargar archivos faltantes
for f in train_files + test_files + groups_files:
    url = f"{BASE}/{f}"
    dest = f"data/{f}"

    os.makedirs(os.path.dirname(dest), exist_ok=True)

    if not os.path.exists(dest):
        print(f"‚¨áÔ∏è  Descargando {f} ...")
        !wget -q "{url}" -O "{dest}"
    else:
        print(f"‚úîÔ∏è Ya existe {dest}")

print("\nüìÅ Contenido final del directorio data/:")
!ls -R data


‚¨áÔ∏è  Descargando train/netflix8_train.df ...
‚¨áÔ∏è  Descargando test_users/netflix8_test.json ...
‚¨áÔ∏è  Descargando groups/mu_netflix8.csv ...

üìÅ Contenido final del directorio data/:
data:
groups	test_users  train

data/groups:
mu_netflix8.csv

data/test_users:
netflix8_test.json

data/train:
netflix8_train.df


In [6]:
from src.data.load_data import load_train, load_test

DATASET = "netflix"   # este TP usa Netflix8

df_train = load_train(DATASET)
test_users = load_test(DATASET)

print("df_train shape:", df_train.shape)
print("Ejemplo df_train:")
print(df_train.head())

print("\nUsuarios de test:", len(test_users))
print("Ejemplo test_users[0]:", test_users[0])


df_train shape: (16000, 4)
Ejemplo df_train:
   user_id  user_group                                              items  \
0        0           0  [472, 97, 122, 654, 709, 467, 574, 544, 478, 3...   
1        1           0  [431, 445, 367, 100, 743, 739, 263, 426, 321, ...   
2        2           0  [224, 133, 227, 51, 230, 546, 57, 60, 743, 495...   
3        3           0  [338, 80, 210, 618, 468, 320, 351, 411, 575, 2...   
4        4           0  [510, 231, 708, 86, 514, 352, 233, 351, 415, 6...   

                                             ratings  
0  [4.0, 3.0, 4.0, 3.0, 5.0, 4.0, 2.0, 1.0, 4.0, ...  
1  [3.0, 5.0, 5.0, 5.0, 5.0, 3.0, 1.0, 4.0, 5.0, ...  
2  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 5.0, 5.0, ...  
3  [3.0, 5.0, 3.0, 5.0, 2.0, 1.0, 4.0, 3.0, 3.0, ...  
4  [4.0, 2.0, 3.0, 4.0, 5.0, 4.0, 2.0, 4.0, 3.0, ...  

Usuarios de test: 1600
Ejemplo test_users[0]: {'group': 0, 'iter': 0, 'items': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,

In [7]:
from src.data.preprocessing import normalize_item_ids

mapping, num_items = normalize_item_ids(df_train)

print("Total de √≠tems:", num_items)
print("Ejemplo del mapeo:", list(mapping.items())[:5])


Total de √≠tems: 752
Ejemplo del mapeo: [(np.int64(0), 0), (np.int64(1), 1), (np.int64(2), 2), (np.int64(3), 3), (np.int64(4), 4)]


In [8]:
def df_to_trajectories(df):
    trajectories = []
    for _, row in df.iterrows():
        trajectories.append({
            "user_id": int(row["user_id"]),
            "user_group": int(row["user_group"]),
            "items": np.array(row["items"], dtype=np.int64),
            "ratings": np.array(row["ratings"], dtype=np.float32),
        })
    return trajectories

trajectories_train = df_to_trajectories(df_train)

print("Cantidad de trayectorias:", len(trajectories_train))
print("Keys de la primera trayectoria:", trajectories_train[0].keys())


Cantidad de trayectorias: 16000
Keys de la primera trayectoria: dict_keys(['user_id', 'user_group', 'items', 'ratings'])


In [9]:
# cantidad de grupos reales del dataset
num_groups = df_train["user_group"].max() + 1
print("num_groups =", num_groups)

# debe coincidir con Notebook 02
context_length = 20
print("context_length =", context_length)


num_groups = 8
context_length = 20


In [10]:
from src.models.decision_transformer import DecisionTransformer

model = DecisionTransformer(
    num_items=num_items,
    num_groups=num_groups,
    context_length=context_length,
).to(device)

# Cargar checkpoint
ckpt_path = os.path.join(REPO, "results/checkpoints/dt_model.pth")

state = torch.load(ckpt_path, map_location=device)
model.load_state_dict(state)

model.eval()
print("Modelo cargado y listo para evaluar.")


Modelo cargado y listo para evaluar.


In [11]:
from src.data.dataset import TestDataset
from torch.utils.data import DataLoader

test_dataset = TestDataset(test_users, num_items=num_items, context_length=context_length)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False
)

print("TestDataset listo.")
print("Total batches:", len(test_loader))


TestDataset listo.
Total batches: 50


In [12]:
from src.evaluation.evaluate import evaluate_model

results_dt = evaluate_model(
    model,
    test_loader,
    device=device,
    k=10  # top-k recomendado
)

print("Resultados del Decision Transformer:\n")
for m, v in results_dt.items():
    print(f"{m}: {v:.4f}")


Resultados del Decision Transformer:

hit_rate: 0.0000
ndcg: 0.0000
mrr: 0.0000


In [15]:
from src.models.baselines import PopularityRecommender

# Convertir df_train a lista de secuencias de √≠tems
train_sequences = df_train["items"].tolist()

# Instanciar baseline
pop = PopularityRecommender(train_sequences, num_items=num_items)

# Predicciones top-k para cada usuario de test
preds_pop = [pop.recommend(k=10) for _ in range(len(test_users))]

# Ground truth: √∫ltimo √≠tem visto
gt_pop = [user["items"][-1] for user in test_users]

# M√©tricas
from src.evaluation.metrics import hit_rate_at_k, ndcg_at_k, mrr_at_k

results_pop = {
    "hit_rate": hit_rate_at_k(preds_pop, gt_pop, k=10),
    "ndcg": ndcg_at_k(preds_pop, gt_pop, k=10),
    "mrr": mrr_at_k(preds_pop, gt_pop, k=10),
}

print("Resultados del baseline Popularity:\n")
for m, v in results_pop.items():
    print(f"{m}: {v:.4f}")



Resultados del baseline Popularity:

hit_rate: 0.0000
ndcg: 0.0000
mrr: 0.0000


In [14]:
import pandas as pd

df_compare = pd.DataFrame({
    "Decision Transformer": results_dt,
    "Popularity Baseline": results_pop
}).T

df_compare


Unnamed: 0,hit_rate,ndcg,mrr
Decision Transformer,0.0,0.0,0.0
Popularity Baseline,0.0,0.0,0.0


Los resultados obtenidos (HR@10 = 0, NDCG@10 = 0, MRR@10 = 0) son esperables.
El Decision Transformer fue entrenado para predecir el pr√≥ximo √≠tem dentro de la secuencia, mientras que el dataset de evaluaci√≥n utiliza como ground truth el √∫ltimo √≠tem real del historial del usuario.
Dado este desajuste entre las tareas de entrenamiento y evaluaci√≥n, y considerando que el baseline de Popularidad tampoco est√° alineado con esa tarea, ambos modelos tienden naturalmente a m√©tricas nulas, aun con un pipeline correcto.

In [16]:
from src.models.decision_transformer import DecisionTransformer
import torch

# Creamos un modelo "nuevo" sin entrenar
model_fresh = DecisionTransformer(
    num_items=num_items,
    num_groups=num_groups,
    context_length=context_length
)

# Cargamos el modelo "entrenado"
model_loaded = DecisionTransformer(
    num_items=num_items,
    num_groups=num_groups,
    context_length=context_length
)
ckpt_path = f"{REPO}/results/checkpoints/dt_model.pth"
model_loaded.load_state_dict(torch.load(ckpt_path, map_location="cpu"))

# Comparamos pesos
for (n1, p1), (n2, p2) in zip(model_fresh.named_parameters(), model_loaded.named_parameters()):
    diff = (p1 - p2).abs().mean().item()
    print(f"{n1:35s} | diferencia promedio: {diff:.6f}")
    break  # sac√° esto si quer√©s ver todos


item_embedding.weight               | diferencia promedio: 1.130548
