In [1]:
from d3rlpy.algos import DiscreteCQLConfig
from d3rlpy.datasets import get_minari
from d3rlpy.metrics import (
    TDErrorEvaluator,
    AverageValueEstimationEvaluator,
    EnvironmentEvaluator,
)

def train_discrete_cql_on_minari(env_id: str, n_steps: int = 100000, use_gpu: bool = True):
    """
    Trains Discrete Conservative Q-Learning on a Minari dataset using d3rlpy.

    Parameters:
    - env_id (str): ID of the Minari dataset (e.g., "minari/cartpole-random-v0").
    - n_steps (int): Total number of training steps.
    - use_gpu (bool): Whether to use GPU ("cuda") if available.

    Returns:
    - Trained CQL model
    """
    # Load dataset and environment from Minari
    dataset, env = get_minari(env_id)

    # Setup Discrete CQL
    cql = DiscreteCQLConfig().create(device="cuda:0" if use_gpu else "cpu")

    # Setup evaluators
    evaluators = {
        "td_error": TDErrorEvaluator(dataset.episodes[:10]),
        "value_estimation": AverageValueEstimationEvaluator(dataset.episodes[:10]),
        "environment": EnvironmentEvaluator(env),
    }

    # Start training
    cql.fit(
        dataset,
        n_steps=n_steps,
        n_steps_per_epoch=n_steps // 100,
        evaluators=evaluators
    )

    return cql


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# === Run the training ===

dataset_id = "minigrid/BabyAI-Pickup/optimal-fullobs-v0"
trained_cql = train_discrete_cql_on_minari(dataset_id, n_steps=50000)

ValueError: Unsupported observation space: Dict('direction': Discrete(4), 'image': Box(0, 255, (22, 22, 3), uint8), 'mission': MissionSpace(<function BabyAIMissionSpace._gen_mission at 0x2e541add0>, None))