# Deep Q Learning using Keras

In [1]:
# See that, we are using the correct environment
import sys
sys.executable

'/usr/bin/python3'

In [2]:
import numpy as np
import gym

import keras.backend as k

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory

Using TensorFlow backend.


# Gym env and actions

In [13]:
ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions available in the Cartpole problem
env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
nb_actions = env.action_space.n

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


# See the Input Shape

In [4]:
input_shape=(1,) + env.observation_space.shape
print(input_shape)

(1, 4)


# Simple NN model to approximate the Q Value

In [5]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16, name ="Dense_1"))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 4)                 0         
_________________________________________________________________
Dense_1 (Dense)              (None, 16)                80        
_________________________________________________________________
activation_1 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 34        
_________________________________________________________________
activation_2 (Activation)    (None, 2)                 0         
Total params: 114
Trainable params: 114
Non-trainable params: 0
_________________________________________________________________
None


# Creating the exploration policy and Memory buffer

In [6]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.00,value_min=.05, value_test=.05,
nb_steps=10000)

memory = SequentialMemory(limit=10000, window_length=1)

dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=100, policy=policy)

dqn.compile(Adam(lr=0.001), metrics=['mae'])

# Training the Agent

In [9]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=0)



<keras.callbacks.History at 0x7f506c03d358>

# Saving the weight File

In [13]:
dqn.save_weights('Trained_Keras_Cartpole.h5f', overwrite=False)

# Loading the Pretrained Agent

In [7]:
dqn.load_weights('Trained_Keras_Cartpole.h5f')

# Testing the trained Agent

In [15]:
dqn.test(env, nb_episodes=1, visualize=True)

Testing for 1 episodes ...
Episode 1: reward: 200.000, steps: 200


<keras.callbacks.History at 0x7f4ffa37b0b8>

In [16]:
env.close()