## Filter out TensorFlow's many warnings

In [3]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

## Setup

In [4]:
import os
import numpy as np
import gym 
import stable_baselines

In [5]:
RENDER = False
N_EXPERIMENTS = 5
ENV_NAME = "CartPole-v1"
RUN_NAME = "baseline_dqn"
TENSORBOARD_DIR = f"../outputs/{ENV_NAME}/{RUN_NAME}"

with gym.make(ENV_NAME) as env:
    for run in range(N_EXPERIMENTS):
        print(f"[{run}] Training...")
        model = stable_baselines.DQN("MlpPolicy", env, verbose=0, tensorboard_log=TENSORBOARD_DIR)
        log_name = f"run_{run}"
        if RENDER:
            render_callback = stable_baselines.common.callbacks.EvalCallback(
                env, n_eval_episodes=1, eval_freq=1000, render=True, verbose=0
            )
            model.learn(
                total_timesteps=10000, tb_log_name=log_name, callback=render_callback
            )
        else:
            model.learn(total_timesteps=10000, tb_log_name=log_name)

        eval_final = stable_baselines.common.evaluation.evaluate_policy(
            model, env, n_eval_episodes=100,
        )
        print(f"[{run}] Final evaluation: {eval_final}")

        if RENDER:
            stable_baselines.common.evaluation.evaluate_policy(
                model, env, n_eval_episodes=1, render=True
            )

[0] Training...
[0] Final evaluation: (113.93, 3.9147285985110125)
[1] Training...
[1] Final evaluation: (144.04, 6.694654584069292)
[2] Training...
[2] Final evaluation: (96.55, 9.703994023081425)
[3] Training...
[3] Final evaluation: (118.05, 6.14389941323912)
[4] Training...
[4] Final evaluation: (157.61, 9.21834583859816)
