In [58]:
import os
import torch
import torch.nn as nn
import numpy as np
from torchrl.data.datasets.minari_data import MinariExperienceReplay
from torchrl.modules import MLP
from torchrl.modules.distributions import NormalParamExtractor, TanhNormal
from torchrl.modules.tensordict_module.actors import ProbabilisticActor
from torchrl.modules.tensordict_module.common import SafeModule
from torchrl.modules.tensordict_module.actors import ValueOperator
from torchrl.objectives import CQLLoss, SoftUpdate
from tensordict import TensorDict
from torchrl.data.tensor_specs import Bounded

# ---- Configuration ----
dataset_id = "minigrid/BabyAI-Pickup/optimal-fullobs-v0"
batch_size = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---- Load Dataset ----
replay_buffer = MinariExperienceReplay(
    dataset_id=dataset_id,
    batch_size=batch_size,
    root=f"{os.getenv('HOME')}/.minari/datasets",
    download = True
)

# ---- Infer Dimensions ----
sample = replay_buffer.sample()
obs_dim = sample["observation"].shape[-1]
act_dim = sample["action"].shape[-1]

action_spec = Bounded(
    low=-torch.ones(act_dim),
    high=torch.ones(act_dim),
    shape=torch.Size([act_dim])
)

class MinariObsEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2),  # adjust as needed
            nn.ReLU(),
            nn.Flatten()
        )
        self.direction_embed = nn.Embedding(4, 8)  # assuming 4 directions

    def forward(self, obs_td):
        image = obs_td["image"].permute(0, 3, 1, 2).float() / 255.0  # [B, 22]
        dir_emb = self.direction_embed(obs_td["direction"])          # [B, 8]
        img_feat = self.encoder(image)                               # [B, N]
        return torch.cat([img_feat, dir_emb], dim=-1)                # [B, obs_dim]

obs_encoder = MinariObsEncoder()

# Run one batch through the encoder to get output dimension
with torch.no_grad():
    obs_encoded = obs_encoder(sample["observation"])
    obs_encoder_output_dim = obs_encoded.shape[-1]

print(f"Obs encoder output dim {obs_encoder_output_dim}")

# ---- Define Actor ----
actor_net = nn.Sequential(
    nn.Linear(obs_encoder_output_dim, 256),  # infer this once
    nn.ReLU(),
    nn.Linear(256, 2 * act_dim),
    NormalParamExtractor()
)
actor_module = SafeModule(
    module=nn.Sequential(obs_encoder, actor_net),
    in_keys=["observation"],
    out_keys=["loc", "scale"]
)

actor = ProbabilisticActor(
    module=actor_module,
    in_keys=["loc", "scale"],
    spec=action_spec,
    distribution_class=TanhNormal
)

# ---- Define Critic ----
critic_net = nn.Sequential(
    nn.Linear(obs_dim + act_dim, 256),
    nn.ReLU(),
    nn.Linear(256, 1)
)
critic = ValueOperator(
    module=critic_net,
    in_keys=["observation", "action"]
)

# ---- CQL Loss Module ----
loss_module = CQLLoss(
    actor_network=actor,
    qvalue_network=critic,
    action_spec=action_spec,
    temperature=1.0,
    min_q_weight=5.0,
    num_random=10,
    target_entropy="auto"
)

# ---- Optimizer ----
optimizer = torch.optim.Adam(loss_module.parameters(), lr=3e-4)
target_net_updater = SoftUpdate(loss_module, eps=0.005)

# ---- Training Loop ----
for step in range(500):  # Small demo
    batch = replay_buffer.sample().to(device)

    print("Sample keys:", sample.keys())
    print("Observation tensor:", sample["observation"])
    print("Observation shape:", sample["observation"].shape)
    print("Single obs shape:", sample["observation"][0].shape)

    loss_td = loss_module(batch)

    total_loss = (
        loss_td["loss_actor"]
        + loss_td["loss_qvalue"]
        + loss_td["loss_cql"]
        + loss_td["loss_alpha"]
    )

    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    target_net_updater.step()

    if step % 50 == 0:
        print(f"[Step {step}] Loss: {total_loss.item():.4f}")

print("✅ Training complete.")

Obs encoder output dim 3208
Sample keys: _StringKeys(dict_keys(['episode', 'action', 'observation', 'next', 'index']))
Observation tensor: TensorDict(
    fields={
        direction: Tensor(shape=torch.Size([32]), device=cpu, dtype=torch.int64, is_shared=False),
        image: Tensor(shape=torch.Size([32, 22, 22, 3]), device=cpu, dtype=torch.uint8, is_shared=False),
        mission: NonTensorStack(
            [array([b'pick up a purple key', b'pick up a purpl...,
            batch_size=torch.Size([32]),
            device=cpu)},
    batch_size=torch.Size([32]),
    device=cpu,
    is_shared=False)
Observation shape: torch.Size([32])
Single obs shape: torch.Size([])


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)

In [53]:
from torchrl.data.datasets.minari_data import MinariExperienceReplay
import os

dataset_id = "minigrid/BabyAI-Pickup/optimal-fullobs-v0"
root = f"{os.getenv('HOME')}/.minari/datasets"

replay_buffer = MinariExperienceReplay(dataset_id=dataset_id, batch_size=32, root=root)

sample = replay_buffer.sample()

print("Sample keys:", sample.keys())
print("Image shape:", sample["observation"]["image"].shape)
print("Direction shape:", sample["observation"]["direction"].shape)
print("Action shape:", sample["action"].shape)
print("First actions:", sample["action"][:5])



Downloading minigrid/BabyAI-Pickup/optimal-fullobs-v0 from Farama servers...


Fetching 2 files: 100%|██████████| 2/2 [00:20<00:00, 10.48s/it]
2025-06-11 16:54:02,538 [torchrl][INFO] first read through data to create data structure...



Dataset minigrid/BabyAI-Pickup/optimal-fullobs-v0 downloaded to /tmp/tmpkr5ms0m4/minigrid/BabyAI-Pickup/optimal-fullobs-v0


2025-06-11 16:54:12,861 [torchrl][INFO] creating tensordict data in /home/jorge/.minari/datasets/minigrid/BabyAI-Pickup/optimal-fullobs-v0: 
2025-06-11 16:54:20,409 [torchrl][INFO] tensordict structure: TensorDict(
    fields={
        action: MemoryMappedTensor(shape=torch.Size([52700]), device=cpu, dtype=torch.int64, is_shared=True),
        episode: MemoryMappedTensor(shape=torch.Size([52700]), device=cpu, dtype=torch.int64, is_shared=True),
        next: TensorDict(
            fields={
                done: MemoryMappedTensor(shape=torch.Size([52700, 1]), device=cpu, dtype=torch.bool, is_shared=True),
                observation: TensorDict(
                    fields={
                        direction: MemoryMappedTensor(shape=torch.Size([52700]), device=cpu, dtype=torch.int64, is_shared=True),
                        image: MemoryMappedTensor(shape=torch.Size([52700, 22, 22, 3]), device=cpu, dtype=torch.uint8, is_shared=True),
                        mission: NonTensorStack(
  

Sample keys: _StringKeys(dict_keys(['episode', 'action', 'next', 'observation', 'index']))
Image shape: torch.Size([32, 22, 22, 3])
Direction shape: torch.Size([32])
Action shape: torch.Size([32])
First actions: tensor([5, 1, 2, 2, 3])


In [52]:
import minari
minari.download_dataset("minigrid/BabyAI-Pickup/optimal-fullobs-v0", force_download=True)



Downloading minigrid/BabyAI-Pickup/optimal-fullobs-v0 from Farama servers...


Fetching 2 files: 100%|██████████| 2/2 [00:16<00:00,  8.47s/it]


Dataset minigrid/BabyAI-Pickup/optimal-fullobs-v0 downloaded to /home/jorge/.minari/datasets/minigrid/BabyAI-Pickup/optimal-fullobs-v0





In [None]:
force_download=True

In [25]:

dataset_id = "minigrid/BabyAI-Pickup/optimal-fullobs-v0"
root = f"{os.getenv('HOME')}/.minari/datasets"

replay_buffer = MinariExperienceReplay(dataset_id=dataset_id, batch_size=32, root=root)

sample = replay_buffer.sample()

print("Sample keys:", sample.keys())
print("Observation keys:", sample["observation"].keys())
print("Observation image shape:", sample["observation"]["image"].shape)
print("Observation direction shape:", sample["observation"]["direction"].shape)
print("Action shape:", sample["action"].shape)
print("Action sample:", sample["action"][:5])

FileNotFoundError: [Errno 2] No such file or directory: '/home/jorge/.minari/datasets/minigrid/BabyAI-Pickup/optimal-fullobs-v0/env_metadata.json'

In [35]:
import torch
from torchrl.data import ReplayBuffer, LazyTensorStorage
import minari

# Cargar dataset desde Minari
dataset = minari.load_dataset("D4RL/minigrid/fourrooms-random-v0")

# Extraer datos de los primeros episodios
observations = []
directions = []
actions = []

for ep in dataset[:10]:  # usa más si quieres
    img = torch.tensor(ep["observations"]["image"])           # (T, 7, 7)
    dir = torch.tensor(ep["observations"]["direction"])       # (T,)
    act = torch.tensor(ep["actions"])                         # (T,)

    observations.append(img)
    directions.append(dir)
    actions.append(act)

# Concatenar todos los episodios
obs_tensor = torch.cat(observations)                          # (N, 7, 7)
dir_tensor = torch.cat(directions)                            # (N,)
act_tensor = torch.cat(actions)                               # (N,)

# Crear buffer
data = [{"observation": {"image": img, "direction": d}, "action": a}
        for img, d, a in zip(obs_tensor, dir_tensor, act_tensor)]

storage = LazyTensorStorage(len(data))
replay_buffer = ReplayBuffer(storage=storage)
for item in data:
    replay_buffer.add(item)

# Samplear un batch
batch = replay_buffer.sample(5)
print("Sampled batch keys:", batch.keys())
print("Image shape:", batch["observation"]["image"].shape)
print("Direction:", batch["observation"]["direction"])
print("Actions:", batch["action"])


KeyError: "Unable to synchronously open object (object 'episode_[0 1 2 3 4 5 6 7 8 9]' doesn't exist)"

In [37]:
from torchrl.data.datasets.minari_data import MinariExperienceReplay
import os

dataset_id = "D4RL/door/human-v2"
root = f"{os.getenv('HOME')}/.minari/datasets"

# Esto SÍ debe funcionar
replay_buffer = MinariExperienceReplay(dataset_id=dataset_id, batch_size=8, root=root)

sample = replay_buffer.sample()

print("Sample keys:", sample.keys())
print("Observation shape:", sample["observation"].shape)
print("Action shape:", sample["action"].shape)
print("First actions:", sample["action"][:5])


FileNotFoundError: [Errno 2] No such file or directory: '/home/jorge/.minari/datasets/D4RL/door/human-v2/env_metadata.json'