
# 02 - Prueba rápida Decision Transformer (sample)

Instancia el modelo de referencia y verifica shapes con datos dummy.


In [1]:

import sys, pathlib
import torch

# Asegurar path del repo
ROOT = pathlib.Path.cwd()
if (ROOT / 'config_dataset.py').exists():
    sys.path.append(str(ROOT))
elif (ROOT.parent / 'config_dataset.py').exists():
    ROOT = ROOT.parent
    sys.path.append(str(ROOT))

from config_dataset import NUM_ITEMS
from src.models.decision_transformer import DecisionTransformer

# Instanciar modelo
model = DecisionTransformer(num_items=NUM_ITEMS, num_groups=8, hidden_dim=128)
print(model)

# Dummy batch
batch_size = 2
seq_len = 5
states = torch.zeros((batch_size, seq_len), dtype=torch.long)
actions = torch.zeros((batch_size, seq_len), dtype=torch.long)
rtg = torch.zeros((batch_size, seq_len, 1))
timesteps = torch.arange(seq_len).unsqueeze(0).repeat(batch_size, 1)
groups = torch.zeros(batch_size, dtype=torch.long)

with torch.no_grad():
    logits = model(states, actions, rtg, timesteps, groups)

print('Logits shape:', logits.shape)  # (B, L, num_items)


DecisionTransformer(
  (item_embedding): Embedding(472, 128)
  (group_embedding): Embedding(8, 128)
  (rtg_embedding): Linear(in_features=1, out_features=128, bias=True)
  (timestep_embedding): Embedding(200, 128)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=512, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (predict_item): Sequential(
    (0): Linear(in_features=128, out_features=1


## Prueba Dataset/DataLoader (Parte 2.2)
Extrae un sample y un batch para validar shapes.


In [2]:

import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from src.data.load_data import load_training_data
from src.data.preprocessing import create_dt_dataset, validate_preprocessing
from src.data.dataset import RecommendationDataset

# Crear trayectorias con un subset pequeño
df_small = load_training_data().head(3)
trajectories = create_dt_dataset(df_small)
validate_preprocessing(trajectories)

dataset = RecommendationDataset(trajectories, context_length=20)
sample = dataset[0]
print('Keys:', sample.keys())
print('states shape:', sample['states'].shape)
print('targets shape:', sample['targets'].shape)
print('Ejemplo targets (primeros 5):', sample['targets'][:5])

loader = DataLoader(dataset, batch_size=2, shuffle=True)
batch = next(iter(loader))
print('Batch states shape:', batch['states'].shape)
print('Batch rtg shape:', batch['rtg'].shape)


Keys: dict_keys(['states', 'actions', 'rtg', 'timesteps', 'groups', 'targets'])
states shape: torch.Size([20])
targets shape: torch.Size([20])
Ejemplo targets (primeros 5): tensor([384, 285, 181, 155, 360])
Batch states shape: torch.Size([2, 20])
Batch rtg shape: torch.Size([2, 20, 1])



## Prueba loop de entrenamiento (Parte 2.3)
Entrena 1 época con un subset pequeño para verificar que corre.


In [4]:

import torch
from torch.utils.data import DataLoader
from src.data.load_data import load_training_data
from src.data.preprocessing import create_dt_dataset
from src.data.dataset import RecommendationDataset
from src.models.decision_transformer import DecisionTransformer
from src.training.trainer import train_decision_transformer

# Subset pequeño para prueba
subset = 200
df_small = load_training_data().head(subset)
trajectories = create_dt_dataset(df_small)
dataset = RecommendationDataset(trajectories, context_length=20)
loader = DataLoader(dataset, batch_size=2, shuffle=True)

model = DecisionTransformer(num_items=NUM_ITEMS, num_groups=8, hidden_dim=64)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Entrenar varias épocas (CPU) para ver la tendencia
num_epochs = 3
device = torch.device('cpu')
model, history = train_decision_transformer(model, loader, optimizer, device, num_epochs=num_epochs)
print('History train_loss:', history['train_loss'])


Epoch 1/3 - train_loss=6.1663
Epoch 2/3 - train_loss=6.1575
Epoch 3/3 - train_loss=6.1561
History train_loss: [6.1663486289978025, 6.157499394416809, 6.156101064682007]
