In [1]:
import numpy as np

import gym
from gym.envs.registration import registry, register, make, spec

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, Input, Reshape
from tensorflow.keras.optimizers import Adam


from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

from car_racing_v1 import *

In [2]:
register(
    id='CarRacing-v1',
    entry_point='gym.envs.box2d:CarRacing',
    max_episode_steps=2000,
    reward_threshold=900,
)

In [3]:
# Get the environment and extract the number of actions.
ENV_NAME = 'CarRacing-v1'
env = CarRacing()

nb_actions = len(env.action_space)
input_shape = env.observation_space.shape



In [4]:
print("nb actions = ", len(env.action_space))
print("observation_space.shape = ", input_shape)

nb actions =  16
observation_space.shape =  (96, 96, 3)


In [5]:
def build_model(input_shape, nb_actions):
    model = Sequential()
    print(input_shape)
    
    model.add(Reshape(input_shape, input_shape = (1,96,96,3)))
    model.add(Convolution2D(32, (8, 8)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, (4, 4)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    
    
    '''model.add(Flatten())
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(nb_actions, activation='linear'))'''
    
    print(model.summary())
    return model

In [6]:
model = build_model(input_shape, nb_actions)

(96, 96, 3)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 96, 96, 3)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 89, 89, 32)        6176      
_________________________________________________________________
activation (Activation)      (None, 89, 89, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 86, 86, 64)        32832     
_________________________________________________________________
activation_1 (Activation)    (None, 86, 86, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 84, 84, 64)        36928     
_________________________________________________________________
activation_2 (Activation)    (None, 84, 84, 

In [7]:
def build_agent(model, nb_actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=3000)
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=nb_actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [8]:
dqn = build_agent(model, nb_actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [9]:
weights_filename = 'dqn_' + ENV_NAME + '_weights.h5f'
checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
log_filename = 'dqn_' + ENV_NAME + '_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500)]
callbacks += [FileLogger(log_filename, interval=100)]

In [10]:
dqn.fit(env, nb_steps=3000, verbose=1, nb_max_episode_steps=1000, action_repetition=3, visualize = False)

Training for 3000 steps ...
Track generation: 971..1224 -> 253-tiles track
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
9


TypeError: 'int' object is not subscriptable

In [None]:
 # After training is done, we save the final weights one more time.
dqn.save_weights(weights_filename, overwrite=True)

In [None]:
# Finally, evaluate our algorithm for 10 episodes.
dqn.test(env, nb_episodes=10, visualize=False)