# Deep Q Learning using Keras

In [6]:
import numpy as np
import gym


from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory

# Gym env and actions

In [7]:
ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions available in the Cartpole problem
env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
nb_actions = env.action_space.n

In [8]:
input_shape=(1,) + env.observation_space.shape
print(input_shape)

(1, 4)


# Simple NN model

In [9]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                80        
_________________________________________________________________
activation_3 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 34        
_________________________________________________________________
activation_4 (Activation)    (None, 2)                 0         
Total params: 114
Trainable params: 114
Non-trainable params: 0
_________________________________________________________________
None


In [10]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.00,value_min=.05, value_test=.05,
nb_steps=10000)

memory = SequentialMemory(limit=10000, window_length=1)

dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=100, policy=policy)

dqn.compile(Adam(lr=0.001), metrics=['mae'])

W1118 20:02:53.499825 4514584000 deprecation_wrapper.py:119] From /Users/sandeep/anaconda3/envs/py36/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:159: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W1118 20:02:53.500586 4514584000 deprecation_wrapper.py:119] From /Users/sandeep/anaconda3/envs/py36/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:164: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W1118 20:02:53.598199 4514584000 deprecation_wrapper.py:119] From /Users/sandeep/anaconda3/envs/py36/lib/python3.7/site-packages/keras/optimizers.py:711: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W1118 20:02:53.634010 4514584000 deprecation.py:506] From /Users/sandeep/anaconda3/envs/py36/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:1247: calling reduce_sum_v1 (from tensorflow.python.ops.math_ops) with

In [14]:
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

In [12]:
dqn.test(env, nb_episodes=100, visualize=True)

Testing for 100 episodes ...
Episode 1: reward: 177.000, steps: 177
Episode 2: reward: 189.000, steps: 189
Episode 3: reward: 172.000, steps: 172
Episode 4: reward: 161.000, steps: 161
Episode 5: reward: 177.000, steps: 177
Episode 6: reward: 157.000, steps: 157
Episode 7: reward: 159.000, steps: 159
Episode 8: reward: 162.000, steps: 162
Episode 9: reward: 171.000, steps: 171
Episode 10: reward: 165.000, steps: 165
Episode 11: reward: 193.000, steps: 193
Episode 12: reward: 179.000, steps: 179
Episode 13: reward: 183.000, steps: 183
Episode 14: reward: 199.000, steps: 199
Episode 15: reward: 183.000, steps: 183
Episode 16: reward: 169.000, steps: 169
Episode 17: reward: 154.000, steps: 154
Episode 18: reward: 166.000, steps: 166
Episode 19: reward: 181.000, steps: 181
Episode 20: reward: 192.000, steps: 192
Episode 21: reward: 161.000, steps: 161
Episode 22: reward: 170.000, steps: 170
Episode 23: reward: 172.000, steps: 172
Episode 24: reward: 200.000, steps: 200
Episode 25: reward: 

<keras.callbacks.History at 0x14bbb16d8>

In [13]:
env.close()