Permalink
d864f29 Dec 6, 2018
1 contributor

Users who have contributed to this file

63 lines (53 sloc) 1.87 KB
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class MultiAgentEnv(object):
"""An environment that hosts multiple independent agents.
Agents are identified by (string) agent ids. Note that these "agents" here
are not to be confused with RLlib agents.
Examples:
>>> env = MyMultiAgentEnv()
>>> obs = env.reset()
>>> print(obs)
{
"car_0": [2.4, 1.6],
"car_1": [3.4, -3.2],
"traffic_light_1": [0, 3, 5, 1],
}
>>> obs, rewards, dones, infos = env.step(
action_dict={
"car_0": 1, "car_1": 0, "traffic_light_1": 2,
})
>>> print(rewards)
{
"car_0": 3,
"car_1": -1,
"traffic_light_1": 0,
}
>>> print(dones)
{
"car_0": False,
"car_1": True,
"__all__": False,
}
"""
def reset(self):
"""Resets the env and returns observations from ready agents.
Returns:
obs (dict): New observations for each ready agent.
"""
raise NotImplementedError
def step(self, action_dict):
"""Returns observations from ready agents.
The returns are dicts mapping from agent_id strings to values. The
number of agents in the env can vary over time.
Returns
-------
obs (dict): New observations for each ready agent.
rewards (dict): Reward values for each ready agent. If the
episode is just started, the value will be None.
dones (dict): Done values for each ready agent. The special key
"__all__" (required) is used to indicate env termination.
infos (dict): Info values for each ready agent.
"""
raise NotImplementedError