Install d4rl-atari.

In [None]:
!pip install git+https://github.com/takuseno/d4rl-atari

Setup dataset.

In [None]:
import gym
import d4rl_atari

# setup environment including dataset
# caution: this dataset will consume around 8GiB of CPU memory!!
env = gym.make('breakout-expert-v0')

# extract dataset
data = env.get_dataset()

Setup data-driven deep reinforcement learning.

In [None]:
from d3rlpy.dataset import MDPDataset
from d3rlpy.algos import DiscreteCQL
from d3rlpy.metrics.scorer import discounted_sum_of_advantage_scorer
from d3rlpy.metrics.scorer import evaluate_on_environment
from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from sklearn.model_selection import train_test_split

# setup MDPDataset
dataset = MDPDataset(data['observations'],
                     data['actions'],
                     data['rewards'],
                     data['terminals'],
                     discrete_action=True) # this flag is necessary!

# this can be replaced with the builtin helper funciton as follows:
# from d3rlpy.datasets import get_atari
# dataset, env = get_atari('breakout-expert-v0')

# setup CQL algorithm (discrete version)
cql = DiscreteCQL(n_frames=4, scaler='pixel', use_gpu=True)

# split train and test episodes
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

# start training
cql.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=10,
        scorers={
            'environment': evaluate_on_environment(env), # use d4rl-atari environment
            'advantage': discounted_sum_of_advantage_scorer, # smaller is better
            'td_error': td_error_scorer, # smaller is better
            'value_scale': average_value_estimation_scorer # smaller is better
        })