Skip to content

Commit

Permalink
gifs
Browse files Browse the repository at this point in the history
  • Loading branch information
rohan-sawhney committed Dec 18, 2017
1 parent f8a68c9 commit c09369e
Show file tree
Hide file tree
Showing 15 changed files with 23 additions and 23 deletions.
12 changes: 9 additions & 3 deletions README.md
Expand Up @@ -2,6 +2,12 @@

[DQN](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf), [DDPG](https://arxiv.org/abs/1509.02971) and [MADDPG](https://arxiv.org/abs/1706.02275) to play tag in OpenAI's [multi-agent particle environment](https://github.com/openai/multiagent-particle-envs)

1 vs 1 | 1 vs 2 | 2 vs 1
:-----:|:------:|:-------:
![](1vs1.gif "1 vs 1") | ![](1vs2.gif "1 vs 2") | ![](2vs1.gif "2 vs 1")
DQN 1 vs 1 | DQN 1 vs 2 | DQN 2 vs 1
:---------:|:----------:|:----------:
![](gifs/dqn_1vs1.gif "1 vs 1") | ![](gifs/dqn_1vs2.gif "1 vs 2") | ![](gifs/dqn_2vs1.gif "2 vs 1")
DDPG 1 vs 1 | DDPG 1 vs 2 | DDPG 2 vs 1
:----------:|:-----------:|:-----------:
![](gifs/ddpg_1vs1.gif "1 vs 1") | ![](gifs/ddpg_1vs2.gif "1 vs 2") | ![](gifs/ddpg_2vs1.gif "2 vs 1")
MADDPG 1 vs 1 | MADDPG 1 vs 2 | MADDPG 2 vs 1
:------------:|:-------------:|:-------------:
![](gifs/maddpg_1vs1.gif "1 vs 1") | ![](gifs/maddpg_1vs2.gif "1 vs 2") | ![](gifs/maddpg_2vs1.gif "2 vs 1")
9 changes: 3 additions & 6 deletions ddpg.py
Expand Up @@ -7,7 +7,7 @@
class Actor:

def __init__(self, scope, session, n_actions, action_bound,
eval_states, target_states, learning_rate=0.0001, tau=0.001):
eval_states, target_states, learning_rate=0.001, tau=0.01):
self.session = session
self.n_actions = n_actions
self.action_bound = action_bound
Expand Down Expand Up @@ -37,10 +37,7 @@ def build_network(self, x, scope, trainable):
h1 = tf.layers.dense(x, 50, activation=tf.nn.relu,
kernel_initializer=W, bias_initializer=b,
name='h1', trainable=trainable)
h2 = tf.layers.dense(h1, 50, activation=tf.nn.relu,
kernel_initializer=W, bias_initializer=b,
name='h2', trainable=trainable)
actions = tf.layers.dense(h2, self.n_actions, activation=tf.nn.tanh,
actions = tf.layers.dense(h1, self.n_actions, activation=tf.nn.tanh,
kernel_initializer=W, bias_initializer=b,
name='actions', trainable=trainable)
scaled_actions = tf.multiply(actions, self.action_bound,
Expand Down Expand Up @@ -70,7 +67,7 @@ class Critic:

def __init__(self, scope, session, n_actions, actor_eval_actions,
actor_target_actions, eval_states, target_states,
rewards, learning_rate=0.001, gamma=0.9, tau=0.001):
rewards, learning_rate=0.001, gamma=0.9, tau=0.01):
self.session = session
self.n_actions = n_actions
self.actor_eval_actions = actor_eval_actions
Expand Down
6 changes: 3 additions & 3 deletions ddpg_tag.py
Expand Up @@ -66,7 +66,7 @@ def play(episodes, is_render, is_testing, checkpoint_interval,

for i in range(env.n):
if done[i]:
rewards[i] -= 50
rewards[i] -= 500

memories[i].remember(states[i], actions[i],
rewards[i], states_next[i], done[i])
Expand Down Expand Up @@ -143,7 +143,7 @@ def play(episodes, is_render, is_testing, checkpoint_interval,
help="where to load network weights")
parser.add_argument('--random_seed', default=2, type=int)
parser.add_argument('--memory_size', default=10000, type=int)
parser.add_argument('--batch_size', default=128, type=int)
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--ou_mus', nargs='+', type=float,
help="OrnsteinUhlenbeckActionNoise mus for each action for each agent")
parser.add_argument('--ou_sigma', nargs='+', type=float,
Expand Down Expand Up @@ -242,7 +242,7 @@ def play(episodes, is_render, is_testing, checkpoint_interval,
state = tf.placeholder(tf.float32, shape=[None, state_size])
reward = tf.placeholder(tf.float32, [None, 1])
state_next = tf.placeholder(tf.float32, shape=[None, state_size])
speed = 0.9 if env.agents[i].adversary else 1
speed = 0.8 if env.agents[i].adversary else 1

actors.append(Actor('actor' + str(i), session, n_action, speed,
state, state_next))
Expand Down
Binary file added gifs/ddpg_1vs1.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added gifs/ddpg_1vs2.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added gifs/ddpg_2vs1.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes
File renamed without changes
File renamed without changes
Binary file added gifs/maddpg_1vs1.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added gifs/maddpg_1vs2.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added gifs/maddpg_2vs1.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 3 additions & 6 deletions maddpg.py
Expand Up @@ -7,7 +7,7 @@
class Actor:

def __init__(self, scope, session, n_actions, action_bound,
eval_states, target_states, learning_rate=0.0001, tau=0.001):
eval_states, target_states, learning_rate=0.001, tau=0.01):
self.session = session
self.n_actions = n_actions
self.action_bound = action_bound
Expand Down Expand Up @@ -37,10 +37,7 @@ def build_network(self, x, scope, trainable):
h1 = tf.layers.dense(x, 50, activation=tf.nn.relu,
kernel_initializer=W, bias_initializer=b,
name='h1', trainable=trainable)
h2 = tf.layers.dense(h1, 50, activation=tf.nn.relu,
kernel_initializer=W, bias_initializer=b,
name='h2', trainable=trainable)
actions = tf.layers.dense(h2, self.n_actions, activation=tf.nn.tanh,
actions = tf.layers.dense(h1, self.n_actions, activation=tf.nn.tanh,
kernel_initializer=W, bias_initializer=b,
name='actions', trainable=trainable)
scaled_actions = tf.multiply(actions, self.action_bound,
Expand Down Expand Up @@ -74,7 +71,7 @@ class Critic:

def __init__(self, scope, session, n_actions, actors_eval_actions,
actors_target_actions, eval_states, target_states,
rewards, learning_rate=0.001, gamma=0.9, tau=0.001):
rewards, learning_rate=0.001, gamma=0.9, tau=0.01):
self.session = session
self.n_actions = n_actions
self.actors_eval_actions = actors_eval_actions
Expand Down
6 changes: 3 additions & 3 deletions maddpg_tag.py
Expand Up @@ -67,7 +67,7 @@ def play(episodes, is_render, is_testing, checkpoint_interval,

for i in range(env.n):
if done[i]:
rewards[i] -= 50
rewards[i] -= 500

memories[i].remember(states, actions, rewards[i],
states_next, done[i])
Expand Down Expand Up @@ -144,7 +144,7 @@ def play(episodes, is_render, is_testing, checkpoint_interval,
help="where to load network weights")
parser.add_argument('--random_seed', default=2, type=int)
parser.add_argument('--memory_size', default=10000, type=int)
parser.add_argument('--batch_size', default=128, type=int)
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--ou_mus', nargs='+', type=float,
help="OrnsteinUhlenbeckActionNoise mus for each action for each agent")
parser.add_argument('--ou_sigma', nargs='+', type=float,
Expand Down Expand Up @@ -234,7 +234,7 @@ def play(episodes, is_render, is_testing, checkpoint_interval,
state_size = env.observation_space[i].shape[0]
state = tf.placeholder(tf.float32, shape=[None, state_size])
state_next = tf.placeholder(tf.float32, shape=[None, state_size])
speed = 0.9 if env.agents[i].adversary else 1
speed = 0.8 if env.agents[i].adversary else 1

actors.append(Actor('actor' + str(i), session, n_action, speed,
state, state_next))
Expand Down
4 changes: 2 additions & 2 deletions multiagent/scenarios/simple_tag_guided.py
Expand Up @@ -99,7 +99,7 @@ def agent_reward(self, agent, world):
if agent.collide:
for a in adversaries:
if self.is_collision(a, agent):
rew -= 100
rew -= 500

# agents are penalized for exiting the screen, so that they can be
# caught by the adversaries
Expand Down Expand Up @@ -132,7 +132,7 @@ def adversary_reward(self, agent, world):
for ag in agents:
for adv in adversaries:
if self.is_collision(ag, adv):
rew += 100
rew += 500
return rew

def observation(self, agent, world):
Expand Down

0 comments on commit c09369e

Please sign in to comment.