In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath('..'))


from dt_reference import DecisionTransformeroReference
from dataset_reference import RecommendationDatasetReference
from train_reference import train_decision_transformer_reference

import numpy as np
import torch as T
from torch.utils.data import random_split, DataLoader

# Set up device
device = T.device('cuda' if T.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

def set_seed(seed):
    np.random.seed(seed)
    T.manual_seed(seed)
    if T.cuda.is_available():
        T.cuda.manual_seed_all(seed)
set_seed(42)

Using device: cuda


# Entrenamiento de DT4REC

En este notebook se implementa un pipeline de entrenamiento de un Transformer de Decisiones (DT) para predecir recomendaciones partiendo de un dataset de Netflix.

### Carga de trayectorias

Se cargan las trayectorias previamente preprocesadas. 

Se tienen dos archivos de trayectoria uno con las recompensas normalizadas ('data/processed/normalized_trajectories_train.pkl') y otro con las recompensas sin normalizar (data/processed/trajectories_train.pkl). 

In [None]:
import pickle

load_normalized_trayectories = False

if load_normalized_trayectories:
    with open('../data/processed/normalized_trajectories_train.pkl', 'rb') as f:
        trajectories = pickle.load(f)
else:
    with open('../data/processed/normalized_trajectories_train.pkl', 'rb') as f:
        trajectories = pickle.load(f)

### Configuración de Híper-parámetros.

Se realizaron algunas pruebas sobre distintos conjuntos de híper-parámetros. Finalmente, estos fueron los que proveyeron mejores resultados:

In [None]:
# Hyperparameters
num_items = 752
num_groups = 8
hidden_dim = 1024
n_layers = 3
n_heads = 4
context_length = 25
max_timesteps = 200
dropout = 0.2
batch_size = 128
num_epochs = 200
learning_rate = 0.0001

### Inicialización del modelo

Con los híper-parámetros elegidos, se instancia la clase `DecisionTransformer` que contiene el modelo a entrenar

In [None]:
# Inicialización del modelo
model = DecisionTransformeroReference(
    num_items=num_items,
    num_groups=num_groups,
    hidden_dim=hidden_dim,
    n_layers=n_layers,
    n_heads=n_heads,
    context_length=context_length,
    max_timestep=max_timesteps,
    dropout=dropout
).to(device)

# Número de parámetros del modelo
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")


Model parameters: 2,875,120


### Preparación de datos para entrenamiento

Usando `RecommendationDataset` construimos el conjunto de datos en el formato admitido por el Decision Transformer. Como resultado, se obtiene un diccionario con tensores para 'states', 'actions', 'rtg', 'timesteps', 'groups' y 'targets'.

En esta instancia separamos un subconjunto para validación. Seteamos el tamaño en un 20% del dataset completo.


In [None]:
dataset = RecommendationDatasetReference(trajectories=trajectories, context_length=context_length)
print(dataset.trajectories[0])

print(f"Number of training trajectories: {len(trajectories)}")
print(dataset)

val_ratio = 0.1 # porcentaje de datos para validación
train_ratio = 1 - val_ratio

n_total = len(dataset)
n_train = int(n_total * train_ratio)
n_val = n_total - n_train

train_dataset, val_dataset = random_split(dataset, [n_train, n_val])


train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,        # siempre validamos sobre la misma secuencia
    num_workers=0
)

print("\nDataset details:")
print(f"Dataset train size: {len(train_dataset)}")
print(f"Batches per epoch (train): {len(train_loader)}")
print(f"Dataset val size: {len(val_dataset)}")
print(f"Batches per epoch (val): {len(val_loader)}")

# Para testear los shapes de los batches
# sample_batch = next(iter(train_loader))
# print(f"\nSample batch shapes:")
# for key, val in sample_batch.items():
#     print(f"  {key}: {val.shape}")



{'items': array([472,  97, 122, 654, 709, 467, 574, 544, 478, 338, 301, 185, 156,
       712, 525, 225, 396, 370, 589, 750, 586, 353, 416, 687, 465, 433,
       215, 438, 219, 528, 134, 444,  92,  24, 145, 572,  22, 454, 715,
       364,  19, 115, 386, 374, 203, 229,  44, 624, 303, 152, 206, 268,
       329, 692,   8,  12, 291, 633, 537, 457, 121, 689, 164, 114, 566,
       519, 234, 154, 104,  74,  49,  86, 419, 315, 387, 663, 400]), 'ratings': array([4., 3., 4., 3., 5., 4., 2., 1., 4., 5., 5., 3., 4., 4., 4., 2., 5.,
       5., 5., 4., 4., 3., 4., 5., 1., 1., 5., 4., 4., 4., 4., 4., 4., 5.,
       2., 4., 5., 3., 4., 3., 2., 1., 5., 3., 4., 4., 3., 5., 4., 5., 4.,
       5., 4., 3., 4., 4., 5., 5., 3., 2., 4., 5., 5., 5., 2., 5., 5., 2.,
       4., 5., 4., 5., 4., 5., 4., 3., 5.]), 'returns_to_go': array([ 0.48681003,  0.46156233,  0.44262656,  0.41737887,  0.3984431 ,
        0.36688349,  0.34163579,  0.32901195,  0.32270002,  0.29745233,
        0.26589271,  0.2343331 ,  0.21539733

In [None]:
optimizer = T.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.001)


# Cargar mejor checkpoint si existe
checkpoint_dir = "checkpoints"
if os.path.exists(checkpoint_dir):
    checkpoint_files = [f for f in os.listdir(checkpoint_dir) if f.endswith('.pt')]
    
    if checkpoint_files:
        # Ordenar por nombre para obtener el último
        last_checkpoint = sorted(checkpoint_files)[-1]
        checkpoint_path = os.path.join(checkpoint_dir, last_checkpoint)
        
        print(f"Cargando checkpoint: {last_checkpoint}")
        checkpoint = T.load(checkpoint_path, map_location=device)
        
        # Cargar estado del modelo
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        
        print(f"✓ Checkpoint restaurado")
        print(f"  Epoch: {checkpoint['epoch']}")
        print(f"  Val Loss: {checkpoint['val_loss']:.4f}")
        print(f"  Train Loss: {checkpoint['train_loss']:.4f}")
    else:
        print("⚠ No se encontraron checkpoints. Iniciando entrenamiento desde cero.")
else:
    print("⚠ Carpeta de checkpoints no existe. Iniciando entrenamiento desde cero.")


⚠ Carpeta de checkpoints no existe. Iniciando entrenamiento desde cero.


In [None]:


model, history = train_decision_transformer_reference(
    model,
    train_loader,
    val_loader,
    optimizer,
    device,
    num_epochs=1000,
    checkpoint_dir="checkpoints"
)


TypeError: train_decision_transformer_reference() got multiple values for argument 'num_epochs'