
# 02 - Training Decision Transformer
Notebook de entrenamiento siguiendo la guía (opción 1).


In [1]:

%matplotlib inline
import sys, pathlib
import torch
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt

# Ajustar path al root
ROOT = pathlib.Path.cwd()
if (ROOT / 'config_dataset.py').exists():
    sys.path.append(str(ROOT))
elif (ROOT.parent / 'config_dataset.py').exists():
    ROOT = ROOT.parent
    sys.path.append(str(ROOT))

from config_dataset import NUM_ITEMS
from src.data.load_data import load_training_data
from src.data.preprocessing import create_dt_dataset, validate_preprocessing
from src.data.dataset import RecommendationDataset
from src.models.decision_transformer import DecisionTransformer
from src.training.trainer import train_decision_transformer

print(f"NUM_ITEMS: {NUM_ITEMS}")
print(f"GPU disponible: {torch.cuda.is_available()}")


NUM_ITEMS: 752
GPU disponible: False



## Cargar datos procesados
Usa `data/processed/trajectories_train.pkl` si está disponible, sino procesa desde cero.


In [2]:

import pickle
from pathlib import Path

processed_path = ROOT / 'data/processed/trajectories_train.pkl'

if processed_path.exists():
    with processed_path.open('rb') as f:
        trajectories = pickle.load(f)
    print(f"Cargadas trayectorias desde {processed_path} -> {len(trajectories)}")
else:
    df_train = load_training_data()
    trajectories = create_dt_dataset(df_train)
    validate_preprocessing(trajectories)
    processed_path.parent.mkdir(parents=True, exist_ok=True)
    with processed_path.open('wb') as f:
        pickle.dump(trajectories, f)
    print(f"Generadas y guardadas trayectorias -> {len(trajectories)}")


Cargadas trayectorias desde /home/manu/Documentos/diplo/tp_decision_transformer/data/processed/trajectories_train.pkl -> 16000



## Train/Val split y DataLoaders


In [3]:

context_length = 20
batch_size = 64  # subir si la GPU lo permite (reduce si falta memoria)
val_frac = 0.1

full_dataset = RecommendationDataset(trajectories, context_length=context_length)
val_size = int(len(full_dataset) * val_frac)
train_size = len(full_dataset) - val_size
train_ds, val_ds = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

print(f"Train size: {train_size}, Val size: {val_size}")
print(f"Batch size: {batch_size}")


Train size: 14400, Val size: 1600
Batch size: 64



## Instanciar modelo y optimizador


In [4]:

hidden_dim = 256  # capacidad alta, requiere GPU
n_layers = 4
n_heads = 8
learning_rate = 2e-4  # LR más conservador para entrenamiento largo
num_epochs = 50  # entrenamiento prolongado  # ajusta según recursos

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = DecisionTransformer(
    num_items=NUM_ITEMS,
    num_groups=8,
    hidden_dim=hidden_dim,
    n_layers=n_layers,
    n_heads=n_heads,
    context_length=context_length,
)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)



## Entrenamiento


In [5]:

model = train_decision_transformer(
    model,
    train_loader,
    optimizer,
    device,
    num_epochs=num_epochs,
)
# trainer no retorna history en esta versión de referencia
history = {'train_loss': [], 'val_loss': []}


Epoch 1/50, Loss: 6.6243
Epoch 2/50, Loss: 6.6233
Epoch 3/50, Loss: 6.6232
Epoch 4/50, Loss: 6.6232
Epoch 5/50, Loss: 6.6229
Epoch 6/50, Loss: 6.6229
Epoch 7/50, Loss: 6.6230
Epoch 8/50, Loss: 6.6228
Epoch 9/50, Loss: 6.6228
Epoch 10/50, Loss: 6.6228
Epoch 11/50, Loss: 6.6228
Epoch 12/50, Loss: 6.6228
Epoch 13/50, Loss: 6.6227
Epoch 14/50, Loss: 6.6227
Epoch 15/50, Loss: 6.6226
Epoch 16/50, Loss: 6.6226
Epoch 17/50, Loss: 6.6225
Epoch 18/50, Loss: 6.6226
Epoch 19/50, Loss: 6.6225
Epoch 20/50, Loss: 6.6225
Epoch 21/50, Loss: 6.6225
Epoch 22/50, Loss: 6.6225
Epoch 23/50, Loss: 6.6226
Epoch 24/50, Loss: 6.6225
Epoch 25/50, Loss: 6.6225
Epoch 26/50, Loss: 6.6225
Epoch 27/50, Loss: 6.6224
Epoch 28/50, Loss: 6.6224
Epoch 29/50, Loss: 6.6225
Epoch 30/50, Loss: 6.6225
Epoch 31/50, Loss: 6.6225
Epoch 32/50, Loss: 6.6224
Epoch 33/50, Loss: 6.6225
Epoch 34/50, Loss: 6.6224
Epoch 35/50, Loss: 6.6225
Epoch 36/50, Loss: 6.6226
Epoch 37/50, Loss: 6.6224
Epoch 38/50, Loss: 6.6224
Epoch 39/50, Loss: 6.


## Curva de loss


In [6]:

# No hay history devuelto por el trainer de referencia; se omite la curva
print('Entrenamiento finalizado')


Entrenamiento finalizado



## Guardar checkpoint


In [7]:

ckpt_path = ROOT / 'results/checkpoints/dt_checkpoint.pt'
ckpt_path.parent.mkdir(parents=True, exist_ok=True)
torch.save(model.state_dict(), ckpt_path)
print(f"Checkpoint guardado en {ckpt_path}")


Checkpoint guardado en /home/manu/Documentos/diplo/tp_decision_transformer/results/checkpoints/dt_checkpoint.pt
