# This file is for creating the benchmark, with stacked frames.

# import relevant packages

# Settings

In [1]:
import os
import datetime
import wandb

from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CallbackList, EvalCallback
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecTransposeImage, VecVideoRecorder

from wandb.integration.sb3 import WandbCallback

from utils import linear_schedule
from feature_extraction.callbacks.wandb_on_training_end_callback import WandbOnTrainingEndCallback
from collections import OrderedDict

2024-04-01 19:31:35.815325: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-01 19:31:35.834124: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-01 19:31:35.834145: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-01 19:31:35.834641: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-01 19:31:35.838038: I tensorflow/core/platform/cpu_feature_guar

In [2]:
progress_bar = True
project_name = "experiments"
#Human readable timestamp
timestamp = datetime.datetime.now().strftime('_%Y-%m-%d_%H-%M-%S')

run_name = "breakout_benchmark" + timestamp
log_dir = "logs"


# Setup Wandb

In [3]:

wandb.login()

config = OrderedDict([
    # Environment settings
    ('env_id', "BreakoutNoFrameskip-v4"),
    ('n_envs', 8),
    ('env_wrapper', ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
    ('frame_stack', 4),
    ('training_seed', 12),
    ('evaluation_seed', 14),
    
    # Algorithm and policy
    ('algo', 'PPO'),
    ('policy', 'CnnPolicy'),
    
    # Training hyperparameters
    ('batch_size', 256),
    ('n_steps', 128),
    ('n_epochs', 4),
    ('n_timesteps', 10_000_000),
    ('learning_rate', 0.00025),
    ('learning_rate_schedule', 'linear'),
    ('clip_range', 0.1),
    ('clip_range_schedule', 'linear'),
    ('ent_coef', 0.01),
    ('vf_coef', 0.5),
    ('normalize_advantage', False),
    
    # Evaluation and logging
    ('n_eval_episodes', 5),
    ('n_final_eval_episodes', 25),
    ('record_n_episodes', 10),
    ('log_frequency', 10_000),
    
    # Other settings
    ('verbose', 1)
])
root_logdir = os.getcwd()
#wandb.tensorboard.patch(root_logdir=root_logdir)

wandb.init(
    project=project_name,
    name=run_name, # Name of the run
    config=config,
    save_code=True,
    sync_tensorboard=True,
    #monitor_gym=True,
)

config = wandb.config

[34m[1mwandb[0m: Currently logged in as: [33madicreson[0m ([33mfeature_extraction[0m). Use [1m`wandb login --relogin`[0m to force relogin


# Create Evaluation Environment

In [4]:
vec_eval_env = make_atari_env(config.env_id, n_envs=config.n_envs, seed=config.evaluation_seed)
vec_eval_env = VecFrameStack(vec_eval_env, n_stack=config.frame_stack)
vec_eval_env = VecTransposeImage(vec_eval_env)


A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


# Create Training Environment

In [5]:
vec_train_env = make_atari_env(config.env_id, n_envs=config.n_envs, seed=config.training_seed)
vec_train_env = VecFrameStack(vec_train_env, n_stack=config.frame_stack)
vec_train_env = VecTransposeImage(vec_train_env)

# Create Model

In [6]:
# Define the keys for PPO-specific hyperparameters
ppo_params_keys = [
    'batch_size',
    'ent_coef',
    'n_epochs',
    'n_steps',
    'policy',
    'vf_coef',
    'normalize_advantage',
]

# Filter the config dictionary to extract only the PPO hyperparameters
ppo_hyperparams = {key: config[key] for key in ppo_params_keys if key in config}

# Additional hyperparameters not in the initial filter that require custom handling
learning_rate_schedule = linear_schedule(2.5e-4)
clip_range_schedule = linear_schedule(0.1)

# Instantiate the PPO model with the specified hyperparameters and environment
model = PPO(
    **ppo_hyperparams,
    learning_rate=learning_rate_schedule,
    clip_range=clip_range_schedule,
    env=vec_train_env, 
    verbose=1,
    tensorboard_log=f"{log_dir}",
)

Using cuda device


  _torch_pytree._register_pytree_node(


# Create Callbacks

In [7]:
# Save best model
eval_callback = EvalCallback(
    eval_env=vec_eval_env,
    eval_freq=max(config.log_frequency // config.n_envs, 1),
    n_eval_episodes=config.n_eval_episodes,
    best_model_save_path=log_dir,
    log_path=log_dir,
    deterministic=True,
    render=False,
    verbose=0
)
 

# Needs to be changed, so it uses run instead of wandb
wandb_callback = WandbCallback(
    verbose=1,
    gradient_save_freq=config.log_frequency,
)

wandb_on_training_end_callback = WandbOnTrainingEndCallback(
    model=model,
    eval_env=vec_eval_env,
    log_dir=log_dir,
    n_eval_episodes=config.n_final_eval_episodes,
    record_n_episodes=config.record_n_episodes,
)
callbacks = CallbackList([wandb_callback, eval_callback, wandb_on_training_end_callback])

# Train Model with callbacks

In [None]:
model.learn(
    total_timesteps=config.n_timesteps,
    callback=callbacks,
)       

Logging to logs/PPO_5


2024-04-01 19:31:40.539188: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-01 19:31:40.560483: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-01 19:31:40.560503: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-01 19:31:40.561031: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-01 19:31:40.564405: I tensorflow/core/platform/cpu_feature_guar

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 520      |
|    ep_rew_mean     | 0        |
| time/              |          |
|    fps             | 651      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1024     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 537          |
|    ep_rew_mean          | 0.143        |
| time/                   |              |
|    fps                  | 771          |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0013171688 |
|    clip_fraction        | 0            |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -0.0188      |
|    learning_r

# Cleanup