keras-rl · random-user-x · Jun 19, 2018 · Jul 23, 2018 · Jul 23, 2018 · Jul 23, 2018
diff --git a/README.md b/README.md
@@ -34,11 +34,17 @@ As of today, the following algorithms have been implemented:
 - [x] Cross-Entropy Method (CEM) [[7]](http://learning.mpi-sws.org/mlss2016/slides/2016-MLSS-RL.pdf), [[8]](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf)
 - [x] Dueling network DQN (Dueling DQN) [[9]](https://arxiv.org/abs/1511.06581)
 - [x] Deep SARSA [[10]](http://people.inf.elte.hu/lorincz/Files/RL_2006/SuttonBook.pdf)
+- [x] Sample Efficient Actor-Critic (ACER) [[5]](https://arxiv.org/abs/1611.01224)
 - [ ] Asynchronous Advantage Actor-Critic (A3C) [[5]](http://arxiv.org/abs/1602.01783)
 - [ ] Proximal Policy Optimization Algorithms (PPO) [[11]](https://arxiv.org/abs/1707.06347)
 
 You can find more information on each agent in the [doc](http://keras-rl.readthedocs.io/en/latest/agents/overview/).
 
+### Note 
+
+The current version of ACER supports simple toy games. We are working to add atari support soon.
+You can use synchronous environments by using make_gym_env from cmd_utils.py in common folder. Please
+have a look at acer_cartpole.py in the examples folder for better understanding.
 
 ## Installation
 

diff --git a/examples/acer_cartpole.py b/examples/acer_cartpole.py
@@ -0,0 +1,112 @@
+import os
+# To use CPU for faster computation
+# Remove this, if not needed
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+import warnings
+warnings.filterwarnings("ignore", message="numpy.dtype size changed")
+warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
+
+import numpy as np
+import gym
+
+from keras import backend as K
+from keras.models import Model
+from keras.layers import Dense, Activation, Input, ReLU
+from keras.optimizers import Adam
+
+from rl.agents import ACERAgent
+from rl.agents.acer.episode_memory import EpisodeMemory
+from rl.policy import SoftmaxPolicy
+from rl.common.cmd_util import make_gym_env
+from rl.callbacks import FileLogger, ModelIntervalCheckpoint
+
+# TODO : Add support for atari
+# The current implementation supports simple toy games.
+
+ENV_NAME = 'CartPole-v1'
+
+# Define the number or environments and steps
+nenvs = 4
+nsteps = 50
+
+# make_gym_env : Makes synchronous environments
+# make_gym_env only supports actor-critic frameworks
+
+env = make_gym_env(ENV_NAME, nenvs, 123)
+np.random.seed(123)
+env.seed(123)
+
+# Action is discrete
+nb_actions = env.action_space.n
+obs_shape = env.observation_space.shape
+
+# Defining model function
+def model_fn(inp, name='inputs'):
+	inps = Input(tensor=inp, name=name)
+
+	# Define your model here.
+
+	# Note : Parameter sharing is not working
+	# Hence define two different parallel models
+	# for critic and actor networks.
+
+	x_actor = Dense(32, activation='relu')(inps)
+	x_actor = Dense(16)(x_actor)
+	x_actor = ReLU(max_value=80.)(x_actor)
+
+	x_critic = Dense(32, activation='relu')(inps)
+	x_critic = Dense(16, activation='relu')(x_critic)
+
+	# Actor and Critic output for the model
+	actor_output = Dense(nb_actions, activation='softmax')(x_actor)
+	critic_output = Dense(nb_actions, activation='linear')(x_critic)
+
+	# Input list to the model
+	inputs = [inps]
+
+	# Output list to the model
+	outputs = [critic_output, actor_output]
+
+	model = Model(inputs=inputs, outputs=outputs)
+	return model, inputs, outputs
+
+# Policy of the actor model.
+policy = SoftmaxPolicy()
+
+# Experience memory of the agent
+memory = EpisodeMemory(nsteps, 50000)
+agent = ACERAgent(memory, model_fn, nb_actions, obs_shape, policy=policy, nenvs=nenvs, nsteps=nsteps)
+
+# Define the optimizor to be used
+opt = Adam(lr=0.00005, clipvalue=10.)
+
+# Currently compile do not support metrics.
+agent.compile(opt)
+
+mode = 'train'
+if mode == 'train':
+    # Okay, now it's time to learn something! We capture the interrupt exception so that training
+    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
+    weights_filename = 'acer_{}_weights.h5f'.format(ENV_NAME)
+    checkpoint_weights_filename = 'acer_' + ENV_NAME + '_weights_{step}.h5f'
+    log_filename = 'acer_{}_log.json'.format(ENV_NAME)
+    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000)]
+    callbacks += [FileLogger(log_filename, interval=5000)]
+    agent.fit(env, callbacks=callbacks, nb_steps=50000, log_interval=10000)
+
+    # After training is done, we save the final weights one more time.
+    agent.save_weights(weights_filename, overwrite=True)
+
+    # Finally, evaluate our algorithm for 10 episodes.
+    env = gym.make(ENV_NAME)
+    agent.test(env, nb_episodes=10, visualize=False)
+elif mode == 'test':
+    weights_filename = 'acer_{}_weights.h5f'.format(ENV_NAME)
+    # if args.weights:
+    #     weights_filename = args.weights
+    agent.load_weights(weights_filename)
+    env = gym.make(ENV_NAME)
+    agent.test(env, nb_episodes=10, visualize=False)
+# print (abc.losses)
diff --git a/rl/agents/__init__.py b/rl/agents/__init__.py
@@ -1,4 +1,5 @@
 from __future__ import absolute_import
+from .acer import ACERAgent
 from .dqn import DQNAgent, NAFAgent, ContinuousDQNAgent
 from .ddpg import DDPGAgent
 from .cem import CEMAgent

diff --git a/rl/agents/acer/__init__.py b/rl/agents/acer/__init__.py
@@ -0,0 +1 @@
+from .acer import ACERAgent