Setup rendering dependencies for Google Colaboratory.

In [None]:
!apt-get install -y xvfb ffmpeg > /dev/null 2>&1
!pip install pyvirtualdisplay pygame moviepy > /dev/null 2>&1

Install d3rlpy!

In [None]:
!pip install d3rlpy

Setup cartpole environment.

In [None]:
import gym

env = gym.make('CartPole-v1')
eval_env = gym.make('CartPole-v1')

Setup data-driven deep reinforcement learning algorithm.

In [None]:
import d3rlpy

# setup DQN algorithm
dqn = d3rlpy.algos.DQNConfig(
    learning_rate=1e-3,
    target_update_interval=100,
).create()

# setup explorer
explorer = d3rlpy.algos.ConstantEpsilonGreedy(epsilon=0.3)

# setup replay buffer
buffer = d3rlpy.dataset.create_fifo_replay_buffer(limit=50000, env=env)

# start training
dqn.fit_online(
    env,
    buffer,
    explorer,
    eval_env=eval_env,
    n_steps=50000,
    n_steps_per_epoch=10000,
)

Setup rendering utilities for Google Colaboratory.

In [None]:
import glob
import io
import base64

from gym.wrappers import RecordVideo
from IPython.display import HTML
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display

# start virtual display
display = Display()
display.start()

# play recorded video
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        ipythondisplay.display(HTML(data='''
            <video alt="test" autoplay loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
            </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")

Record video!

In [None]:
# wrap Monitor wrapper
env = RecordVideo(gym.make("CartPole-v1", render_mode="rgb_array"), './video')

# evaluate
d3rlpy.metrics.evaluate_qlearning_with_environment(dqn, env)

Let's see how it works!

In [None]:
show_video()