Skip to content

Commit

Permalink
fixed some interactive.py bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
ryan-lowe committed Oct 6, 2017
1 parent dc2d761 commit ba72459
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 25 deletions.
10 changes: 5 additions & 5 deletions bin/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
scenario = scenarios.load(args.scenario).Scenario()
# create world
world = scenario.make_world()
# create multiagent environment
# create multiagent environment
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, shared_viewer = False)
# render call to create viewer window (necessary only for interactive policies)
env.render()
Expand All @@ -33,7 +33,7 @@
# step environment
obs_n, reward_n, done_n, _ = env.step(act_n)
# render all agent views
env.render()
# display rewards
for agent in env.world.agents:
print(agent.name + " reward: %0.3f" % env._get_reward(agent))
env.render()
# display rewards
#for agent in env.world.agents:
# print(agent.name + " reward: %0.3f" % env._get_reward(agent))
8 changes: 4 additions & 4 deletions multiagent/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim),))
agent.action.c = np.zeros(self.world.dim_c)


# rendering
self.shared_viewer = shared_viewer
if self.shared_viewer:
Expand All @@ -90,7 +89,7 @@ def _step(self, action_n):
self._set_action(action_n[i], agent, self.action_space[i])
# advance world state
self.world.step()
# record observation for each agent # TODO: clean up
# record observation for each agent
for agent in self.agents:
obs_n.append(self._get_obs(agent))
reward_n.append(self._get_reward(agent))
Expand Down Expand Up @@ -147,12 +146,13 @@ def _set_action(self, action, agent, action_space, time=None):
act.append(action[index:(index+s)])
index += s
action = act
else:
action = [action]
#else:
# action = [action] # TODO: why is this necessary??

if agent.movable:
# physical action
if self.discrete_action_input:
print(action)
agent.action.u = np.zeros(self.world.dim_p)
# process discrete action
if action[0] == 1: agent.action.u[0] = -1.0
Expand Down
20 changes: 10 additions & 10 deletions multiagent/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,33 @@ class InteractivePolicy(Policy):
def __init__(self, env, agent_index):
super(InteractivePolicy, self).__init__()
self.env = env
# hard-coded keyborad events
# hard-coded keyboard events
self.move = [False for i in range(4)]
self.comm = [False for i in range(env.world.dim_c)]
# register keyboard events with this envornment's window
# register keyboard events with this environment's window
env.viewers[agent_index].window.on_key_press = self.key_press
env.viewers[agent_index].window.on_key_release = self.key_release

def action(self, obs):
# ignore observation and just act based on keyboard events
if self.env.discrete_action_space:
if self.env.discrete_action_input:
u = 0
if self.move[0]: u = 1
if self.move[1]: u = 2
if self.move[2]: u = 4
if self.move[3]: u = 3
else:
u = np.array([0.0,0.0])
if self.move[0]: u[0] -= 1.0
if self.move[1]: u[0] += 1.0
if self.move[2]: u[1] += 1.0
if self.move[3]: u[1] -= 1.0
u = np.array([0.0,0.0,0.0,0.0])
if self.move[0]: u[0] += 1.0
if self.move[1]: u[1] += 1.0
if self.move[2]: u[2] += 1.0
if self.move[3]: u[3] += 1.0
c = 0
for i in range(len(self.comm)):
if self.comm[i]: c = i+1
return [u, c]

# keyborad event callbacks
# keyborad event callbacks
def key_press(self, k, mod):
if k==key.LEFT: self.move[0] = True
if k==key.RIGHT: self.move[1] = True
Expand All @@ -53,4 +53,4 @@ def key_release(self, k, mod):
if k==key.UP: self.move[2] = False
if k==key.DOWN: self.move[3] = False
for i in range(len(self.comm)):
if k==key._1+i: self.comm[i] = False
if k==key._1+i: self.comm[i] = False
9 changes: 4 additions & 5 deletions multiagent/scenarios/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def make_world(self):
for i, agent in enumerate(world.agents):
agent.name = 'agent %d' % i
agent.collide = False
agent.silent = True
agent.silent = True
# add landmarks
world.landmarks = [Landmark() for i in range(1)]
for i, landmark in enumerate(world.landmarks):
Expand All @@ -24,11 +24,11 @@ def make_world(self):
def reset_world(self, world):
# random properties for agents
for i, agent in enumerate(world.agents):
agent.color = np.array([0.25,0.25,0.25])
agent.color = np.array([0.25,0.25,0.25])
# random properties for landmarks
for i, landmark in enumerate(world.landmarks):
landmark.color = np.array([0.75,0.75,0.75])
world.landmarks[0].color = np.array([0.75,0.25,0.25])
world.landmarks[0].color = np.array([0.75,0.25,0.25])
# set random initial states
for agent in world.agents:
agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
Expand All @@ -42,10 +42,9 @@ def reward(self, agent, world):
dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
return -dist2 #np.exp(-dist2)

def observation(self, agent, world):
def observation(self, agent, world):
# get positions of all entities in this agent's reference frame
entity_pos = []
for entity in world.landmarks:
entity_pos.append(entity.state.p_pos - agent.state.p_pos)
return np.concatenate([agent.state.p_vel] + entity_pos)

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(name='multiagent',
version='0.0.1',
description='Multi-Agent Goal-Driven Communication Environment',
url='https://github.com/openai/multiagent',
url='https://github.com/openai/multiagent-public',
author='Igor Mordatch',
author_email='mordatch@openai.com',
packages=find_packages(),
Expand Down

0 comments on commit ba72459

Please sign in to comment.