In [1]:
import numpy as np
import random
import gym_graph
import gym
from collections import defaultdict
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import keras 
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
from keras.optimizers import Adam
import keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
WINDOW_LENGTH = 12

Using TensorFlow backend.


In [3]:
env = gym.make("simple-static-graph-v0")
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

SyntaxError: Missing parentheses in call to 'print'. Did you mean print("Environment reset")? (graph_env.py, line 136)

In [None]:
initObs = env.reset()
env.render()


In [None]:

class EnvProcessor(Processor):
    def process_observation(self, observation):
        print (observation)
        return observation

    def process_reward(self, reward):
        print (reward)
        return reward

In [None]:
INPUT_SHAPE = (len(initObs),)
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(1024))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions, activation="softmax"))
print(model.summary())

In [None]:
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = EnvProcessor()

In [None]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.05, value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])


In [None]:

# Okay, now it's time to learn something! We capture the interrupt exception so that training
# can be prematurely aborted. Notice that you can the built-in Keras callbacks!
weights_filename = 'dqn_{}_weights.h5f'.format("graph-v0")
checkpoint_weights_filename = 'dqn_graph-v0_weights_{step}.h5f'
log_filename = 'dqn_{}_log.json'.format("graph-v0")
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
dqn.fit(env, callbacks=callbacks, nb_steps=200000, log_interval=10000, verbose=2)

# After training is done, we save the final weights one more time.
dqn.save_weights(weights_filename, overwrite=True)

# Finally, evaluate our algorithm for 10 episodes.
dqn.test(env, nb_episodes=10, visualize=True)

In [None]:
plt.show()