# Deep Q Learning using Keras

In [None]:
# See that, we are using the correct environment
import sys
sys.executable

In [None]:
import numpy as np
import gym

import keras.backend as k

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory

# Gym env and actions

In [None]:
ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions available in the Cartpole problem
env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
nb_actions = env.action_space.n

# See the Input Shape

In [None]:
input_shape=(1,) + env.observation_space.shape
print(input_shape)

# Simple NN model to approximate the Q Value

In [None]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16, name ="Dense_1"))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Creating the exploration policy and Memory buffer

In [None]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.00,value_min=.05, value_test=.05,
nb_steps=10000)

memory = SequentialMemory(limit=10000, window_length=1)

dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=100, policy=policy)

dqn.compile(Adam(lr=0.001), metrics=['mae'])

# Training the Agent

In [None]:
dqn.fit(env, nb_steps=50000, visualize=False, verbose=0)

# Saving the weight File

In [None]:
dqn.save_weights('Trained_Keras_Cartpole.h5f', overwrite=False)

# Loading the Pretrained Agent

In [None]:
dqn.load_weights('Trained_Keras_Cartpole.h5f')

# Testing the trained Agent

In [None]:
dqn.test(env, nb_episodes=10, visualize=True)

In [None]:
env.close()