In [2]:
#!pip install 'gymnasium[atari]'
#!pip install 'gymnasium[accept-rom-license]'
#!pip install 'opencv-python'

In [2]:
import gymnasium as gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C
import cv2

### Save model every n steps

In [4]:
from stable_baselines3.common.callbacks import CheckpointCallback, EveryNTimesteps

# this is equivalent to defining CheckpointCallback(save_freq=500)
# checkpoint_callback will be triggered every 500 steps
checkpoint_on_event = CheckpointCallback(save_freq=1, save_path="./logs/")
event_callback = EveryNTimesteps(n_steps=500, callback=checkpoint_on_event)

vec_env = make_atari_env("ALE/Breakout-v5", n_envs=8, seed=0)
# Frame-stacking with 4 frames
vec_env = VecFrameStack(vec_env, n_stack=8)

model = A2C("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=50000, callback=event_callback)

model.save("a2c_breakout")

Using cpu device
Wrapping the env in a VecTransposeImage.
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 237      |
|    ep_rew_mean        | 1.88     |
| time/                 |          |
|    fps                | 191      |
|    iterations         | 100      |
|    time_elapsed       | 20       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -1.37    |
|    explained_variance | 0.132    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.108   |
|    value_loss         | 0.166    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 236      |
|    ep_rew_mean        | 1.9      |
| time/                 |          |
|    fps                | 204      |
|    iterations         | 200      |
|    time_elapsed       | 39       |
|    total_timest

KeyboardInterrupt: 

### Save the best model

In [3]:
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import VecTransposeImage

# Separate evaluation env
eval_env = make_atari_env("ALE/Breakout-v5", n_envs=4, seed=52)
eval_env = VecFrameStack(eval_env, n_stack=4)
eval_env = VecTransposeImage(eval_env)


# Use deterministic actions for evaluation
eval_callback = EvalCallback(eval_env, best_model_save_path="./logs/",
                             log_path="./logs/", eval_freq=500,
                             deterministic=True, render=False)

vec_env = make_atari_env("ALE/Breakout-v5", n_envs=4, seed=0)
# Frame-stacking with 4 frames
vec_env = VecFrameStack(vec_env, n_stack=4)
vec_env = VecTransposeImage(vec_env)

model = A2C("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=2_000_000, callback=eval_callback)

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Using cpu device
Eval num_timesteps=2000, episode_reward=2.40 +/- 1.50
Episode length: 260.60 +/- 62.68
------------------------------------
| eval/                 |          |
|    mean_ep_length     | 261      |
|    mean_reward        | 2.4      |
| time/                 |          |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | 0.757    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.00879  |
|    value_loss         | 0.0181   |
------------------------------------
New best mean reward!
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 234      |
|    ep_rew_mean     | 1.78     |
| time/              |          |
|    fps             | 145      |
|    iterations      | 100      |
|    time_elapsed    | 13       |
|    total_timesteps | 2000     |
---------------------------------
Eval num_timeste

<stable_baselines3.a2c.a2c.A2C at 0x7f363ee17f40>

In [1]:
import time
from utils import render_env_with_model, evaluate_policy
from stable_baselines3.common.vec_env import VecTransposeImage
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_atari_env

eval_env = make_atari_env("ALE/Breakout-v5", n_envs=2, seed=10)
eval_env = VecFrameStack(eval_env, n_stack=4)
eval_env = VecTransposeImage(eval_env)

# Load the best model
model = A2C.load("logs/best_model.zip", env=eval_env)

# render = True not working. Use function in utils instead.
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=2, render=True, fps=30)
print(mean_reward, std_reward)

#render_env_with_model(eval_env, model, num_steps=40, fps=10)


2024-02-26 15:19:57.157325: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-26 15:19:57.157376: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-26 15:19:57.158889: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-26 15:19:57.168864: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
A.L.E: Arcade Learning Environment (version 0.8.1+53f

Running evaluation


  logger.warn(


23.5 0.5


In [3]:
print(eval_env.metadata)
print(eval_env.render_mode)

{'render_modes': ['human', 'rgb_array'], 'render_fps': 1}
human
