In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import gymnasium as gym

from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.logger import HParam
from stable_baselines3.common.logger import configure

In [10]:
class HParamCallback(BaseCallback):
    """
    Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.
    """

    def _on_training_start(self) -> None:
        hparam_dict = {
            "algorithm": self.model.__class__.__name__,
            "learning rate": self.model.learning_rate,
            "gamma": self.model.gamma,
            "batch_size": self.model.batch_size, 
            "tau": self.model.tau,
            "buffer_size": self.model.buffer_size,
        }
        # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag
        # Tensorbaord will find & display metrics from the `SCALARS` tab
        metric_dict = {
            "rollout/ep_len_mean": 0,
            "train/value_loss": 0.0,
        }
        self.logger.record(
            "hparams",
            HParam(hparam_dict, metric_dict),
            exclude=("stdout", "log", "json", "csv"),
        )

    def _on_step(self) -> bool:
        return True

In [12]:
# Create environment
env = gym.make("CartPole-v1")

# Instantiate the agent
model = DQN("MlpPolicy", env, 
            verbose=0, 
            tensorboard_log="./sb3_log/",
            batch_size=128, 
            tau=.8,
            learning_rate=1e-4, 
            buffer_size=100000,
            gamma=0.9);

# Train the agent and display a progress bar
model.learn(total_timesteps=int(2e5), 
            progress_bar=True, 
            tb_log_name="first_run",
            callback=HParamCallback());


Output()