In [None]:
import gymnasium as gym # Environment interface
from stable_baselines3 import DQN # Library for RL agorithms
from stable_baselines3.dqn import MlpPolicy #

env = gym.make("CartPole-v1") # Creates the env, train without rendering (si it's faster!)

model = DQN( # Change the hyperparamaters for better results
    MlpPolicy,
    env,
    learning_rate=1e-3,  # default: 1e-4; CartPole often benefits from faster learning
    buffer_size=50000,   # default is 100,000; reduce if memory constrained
    learning_starts=1000,  # allow model to populate replay buffer before training
    batch_size=64,  # bigger batches can stabilize training
    tau=1.0,  # hard update for target network
    gamma=0.99,  # discount factor
    train_freq=4,  # train every 4 steps
    target_update_interval=1000,  # how often to update target network
    exploration_fraction=0.1,  # fraction of total_timesteps where exploration rate is annealed
    exploration_final_eps=0.02,  # final epsilon
    verbose=1
)

model.learn(total_timesteps=10000) # Train the model for 100,000 steps

render_env = gym.make("CartPole-v1", render_mode="human") # Create a new env with rendering for watching

# Run trained model and only render after trainig 
num_episodes = 20

for i in range(num_episodes):
    obs, info = render_env.reset()
    episode_reward = 0
    frames = []

    while True:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = render_env.step(action)

        episode_reward += reward

        if terminated or truncated:
            print(f"Episode {i+1} done. Total reward: {episode_reward}\n")
            break


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17       |
|    ep_rew_mean      | 17       |
|    exploration_rate | 0.933    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 13821    |
|    time_elapsed     | 0        |
|    total_timesteps  | 68       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18.1     |
|    ep_rew_mean      | 18.1     |
|    exploration_rate | 0.858    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 11290    |
|    time_elapsed     | 0        |
|    total_timesteps  | 145      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.8     |
|    ep_rew_mean      | 24.8   

In [None]:
import gymnasium as gym

env_ids = sorted(gym.envs.registry.keys())

from IPython.display import display, Markdown
display(Markdown("**Registered Environments:**\n" + "\n".join(f"- `{e}`" for e in env_ids)))



**Registered Environments:**
- `Acrobot-v1`
- `Ant-v2`
- `Ant-v3`
- `Ant-v4`
- `Ant-v5`
- `BipedalWalker-v3`
- `BipedalWalkerHardcore-v3`
- `Blackjack-v1`
- `CarRacing-v3`
- `CartPole-v0`
- `CartPole-v1`
- `CliffWalking-v0`
- `FrozenLake-v1`
- `FrozenLake8x8-v1`
- `GymV21Environment-v0`
- `GymV26Environment-v0`
- `HalfCheetah-v2`
- `HalfCheetah-v3`
- `HalfCheetah-v4`
- `HalfCheetah-v5`
- `Hopper-v2`
- `Hopper-v3`
- `Hopper-v4`
- `Hopper-v5`
- `Humanoid-v2`
- `Humanoid-v3`
- `Humanoid-v4`
- `Humanoid-v5`
- `HumanoidStandup-v2`
- `HumanoidStandup-v4`
- `HumanoidStandup-v5`
- `InvertedDoublePendulum-v2`
- `InvertedDoublePendulum-v4`
- `InvertedDoublePendulum-v5`
- `InvertedPendulum-v2`
- `InvertedPendulum-v4`
- `InvertedPendulum-v5`
- `LunarLander-v3`
- `LunarLanderContinuous-v3`
- `MountainCar-v0`
- `MountainCarContinuous-v0`
- `Pendulum-v1`
- `Pusher-v2`
- `Pusher-v4`
- `Pusher-v5`
- `Reacher-v2`
- `Reacher-v4`
- `Reacher-v5`
- `Swimmer-v2`
- `Swimmer-v3`
- `Swimmer-v4`
- `Swimmer-v5`
- `Taxi-v3`
- `Walker2d-v2`
- `Walker2d-v3`
- `Walker2d-v4`
- `Walker2d-v5`
- `phys2d/CartPole-v0`
- `phys2d/CartPole-v1`
- `phys2d/Pendulum-v0`
- `tabular/Blackjack-v0`
- `tabular/CliffWalking-v0`