In [1]:
!pip install keras-rl2
!pip install 'gym[all]'

Collecting gast==0.3.3
  Using cached gast-0.3.3-py2.py3-none-any.whl (9.7 kB)
Installing collected packages: gast
  Attempting uninstall: gast
    Found existing installation: gast 0.2.2
    Uninstalling gast-0.2.2:
      Successfully uninstalled gast-0.2.2
Successfully installed gast-0.3.3


ERROR: Invalid requirement: "'gym[all]'"


In [None]:
%%bash

# install required system dependencies
sudo apt-get install -y xvfb x11-utils

# install required python dependencies (might need to install additional gym extras depending)
pip install gym[box2d]==0.17.* pyvirtualdisplay==0.2.* PyOpenGL==3.1.* PyOpenGL-accelerate==3.1.*

In [None]:
import pyvirtualdisplay


_display = pyvirtualdisplay.Display(visible=False,  # use False with Xvfb
                                    size=(1400, 900))
_ = _display.start()

In [None]:
import numpy as np

import gym
from gym.envs.registration import registry, register, make, spec

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, Input, Reshape
from tensorflow.keras.optimizers import Adam


from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

from car_racing_v1 import *

In [None]:
register(
    id='CarRacing-v1',
    entry_point='gym.envs.box2d:CarRacing',
    max_episode_steps=2000,
    reward_threshold=900,
)

In [None]:
# Get the environment and extract the number of actions.
ENV_NAME = 'CarRacing-v1'
env = CarRacing()

nb_actions = len(env.action_space)
input_shape = env.observation_space.shape

In [None]:
print("nb actions = ", len(env.action_space))
print("observation_space.shape = ", input_shape)

In [None]:
def build_model(input_shape, nb_actions):
    model = Sequential()
    print(input_shape)
    
    model.add(Reshape(input_shape, input_shape = (1,96,96,3)))
    model.add(Convolution2D(32, (8, 8)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, (4, 4)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    
    
    '''model.add(Flatten())
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(nb_actions, activation='linear'))'''
    
    print(model.summary())
    return model

In [None]:
model = build_model(input_shape, nb_actions)

In [None]:
def build_agent(model, nb_actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000)
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=nb_actions, nb_steps_warmup=500, target_model_update=100)
    return dqn

In [None]:
dqn = build_agent(model, nb_actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
weights_filename = 'dqn_' + ENV_NAME + '_weights.h5f'
checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
log_filename = 'dqn_' + ENV_NAME + '_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=2500)]
callbacks += [FileLogger(log_filename, interval=100)]

In [None]:
dqn.fit(env, nb_steps=3000, log_interval=100, verbose=1, nb_max_episode_steps=100, action_repetition=3, visualize = False)

In [None]:
 # After training is done, we save the final weights one more time.
dqn.save_weights(weights_filename, overwrite=True)

In [None]:
# Finally, evaluate our algorithm for 10 episodes.
dqn.test(env, nb_episodes=10, visualize=False)