In [1]:
#!apt-get install -y xvfb python-opengl > /dev/null 2>&1
#!pip install gym pyvirtualdisplay > /dev/null 2>&1
#!apt-get install x11-utils
#!pip install keras-rl
#!pip install keras==2.2.4
#!pip install tensorflow-gpu==1.13.1 

In [2]:
from __future__ import division
import gym
import atari_py
from PIL import Image
import numpy as np
import keras
import tensorflow as tf

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, Input
from keras.optimizers import Adam
import keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
#print(atari_py.list_games())

print(keras.__version__)
print(tf.__version__)


Using TensorFlow backend.


2.2.4
1.13.1


In [3]:
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4

class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('float32')/255 

    #def process_reward(self, reward):
     #   return np.clip(reward, -1., 1.)

In [4]:
#game = 'Freeway-v0' #3
#game = 'BeamRider-v0' #9
game = 'Centipede-v0' #18
#game = 'Breakout-v0' #18
#game = 'DemonAttack-v0' #6
#game = 'AirRaid-v0' #6
env = gym.make(game)
np.random.seed(123)
env.seed(123)
#env.reset()
nb_actions = env.action_space.n
#print(nb_actions)

total_actions = 18

In [5]:
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE

input_layer = Input(shape=input_shape)
if K.image_dim_ordering() == 'tf':
    # (width, height, channels)
    permute_layer = Permute((2, 3, 1))(input_layer)
elif K.image_dim_ordering() == 'th':
    # (channels, width, height)
    permute_layer = Permute((1, 2, 3))(input_layer)
else:
    raise RuntimeError('Unknown image_dim_ordering for environment 1.')

Conv_1 = Convolution2D(32, (8, 8), strides=(4, 4), activation = 'relu')(permute_layer)
Conv_2 = Convolution2D(64, (4, 4), strides=(2, 2), activation = 'relu')(Conv_1)
Conv_3 = Convolution2D(64, (3, 3), strides=(1, 1), activation = 'relu')(Conv_2)
flatten_layer = Flatten()(Conv_3)
dense_layer = Dense(512, activation = 'relu')(flatten_layer)
dense_actions_layer = Dense(total_actions, activation = 'linear')(dense_layer)
output_layer = Dense(nb_actions, activation = 'linear') (dense_actions_layer)

model = Model(inputs = [input_layer], outputs = [output_layer])
print(model.summary())

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 4, 84, 84)         0         
_________________________________________________________________
permute_1 (Permute)          (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 32)        8224      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 64)          36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dens

In [6]:
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=100000)

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, target_model_update=10000,train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])



In [10]:
dqn.load_weights('DQN_model2_weights_Centipede_v2')
history = dqn.test(env, nb_episodes=100, visualize=False)

from statistics import mean

print('Average Reward : {}'.format(mean(history.history['episode_reward'])))

Testing for 100 episodes ...
Episode 1: reward: 292.000, steps: 962
Episode 2: reward: 4532.000, steps: 1357
Episode 3: reward: 1644.000, steps: 1505
Episode 4: reward: 3313.000, steps: 1529
Episode 5: reward: 2567.000, steps: 1413
Episode 6: reward: 2338.000, steps: 1319
Episode 7: reward: 1363.000, steps: 1179
Episode 8: reward: 4782.000, steps: 1749
Episode 9: reward: 1478.000, steps: 1262
Episode 10: reward: 2290.000, steps: 1187
Episode 11: reward: 2023.000, steps: 1198
Episode 12: reward: 428.000, steps: 775
Episode 13: reward: 1861.000, steps: 1349
Episode 14: reward: 326.000, steps: 699
Episode 15: reward: 1829.000, steps: 1128
Episode 16: reward: 669.000, steps: 924
Episode 17: reward: 1707.000, steps: 1035
Episode 18: reward: 860.000, steps: 1475
Episode 19: reward: 1466.000, steps: 1097
Episode 20: reward: 1083.000, steps: 893
Episode 21: reward: 3933.000, steps: 2775
Episode 22: reward: 1629.000, steps: 1510
Episode 23: reward: 1053.000, steps: 1220
Episode 24: reward: 343.

In [8]:
env.close()