In [None]:
import numpy as np
import matplotlib.pyplot as plt

import gym
import warnings
warnings.filterwarnings('ignore')

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

## Set Environment

In [None]:
ENV_NAME = 'CartPole-v0'
env = gym.make(ENV_NAME).unwrapped; env.seed(90);

In [None]:
print('Environment Display:')
env.reset() # reset environment to a new, random state
env.render()

print('State space {}'.format(env.observation_space))
print('Action space {}'.format(env.action_space))

## Build DQN Architecture

In [None]:
model = Sequential()

In [None]:
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

## Set Configs

In [None]:
MEMORY_LIMIT = 50000
WINDOW_LENGTH = 1
MODEL_UPDATE = 1e-2
NB_WARMUP = 10
NB_STEPS = 5000
LR = 1e-3
LOSS = 'mae'

## Set Memory, Policy & Init DQN

In [None]:
memory = SequentialMemory(limit=MEMORY_LIMIT, window_length=WINDOW_LENGTH)

In [None]:
policy = EpsGreedyQPolicy()

In [None]:
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, policy=policy, 
               nb_steps_warmup=NB_WARMUP, target_model_update=MODEL_UPDATE)

## Set Optimizer

In [None]:
dqn.compile(Adam(lr=LR), metrics=[LOSS])

## Train DQN

In [None]:
history = dqn.fit(env, nb_steps=NB_STEPS, visualize=True, verbose=2)

In [None]:
plt.plot(history.history['episode_reward'])
plt.xlabel('Episode')
plt.ylabel('Total of Rewards')
plt.show()

## Test DQN

In [None]:
dqn.test(env, nb_episodes=10, visualize=True)

---