In [None]:
import numpy as np
import gym

import warnings
warnings.filterwarnings('ignore')

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

## Set Environment

In [None]:
ENV_NAME = 'CartPole-v0'

In [None]:
env = gym.make(ENV_NAME)
env.seed(9)

nb_actions = env.action_space.n
np.random.seed(9)

## Build DQN Architecture

In [None]:
model = Sequential()

In [None]:
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

In [None]:
print(model.summary())

## Set Configs

In [None]:
MEMORY_LIMIT = 50000
WINDOW_LENGTH = 1
MODEL_UPDATE = 1e-2
NB_WARMUP=10
LR = 1e-3
LOSS = 'mae'
NB_STEPS = 5000

## Set Memory, Policy & Init DQN

In [None]:
memory = SequentialMemory(limit=MEMORY_LIMIT, window_length=WINDOW_LENGTH)

In [None]:
policy = EpsGreedyQPolicy()

In [None]:
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, policy=policy, 
               nb_steps_warmup=NB_WARMUP, target_model_update=MODEL_UPDATE)

## Set Optimizer

In [None]:
dqn.compile(Adam(lr=LR), metrics=[LOSS])

## Train DQN

In [None]:
dqn.fit(env, nb_steps=NB_STEPS, visualize=True, verbose=2)

## Test DQN

In [None]:
dqn.test(env, nb_episodes=10, visualize=True)

---