Skip to content

Commit

Permalink
Merge pull request #573 from BDonnot/dev_multiagent
Browse files Browse the repository at this point in the history
Dev multiagent
  • Loading branch information
BDonnot committed Jan 22, 2024
2 parents 6111332 + f8dac63 commit 94ecfb4
Show file tree
Hide file tree
Showing 21 changed files with 954 additions and 257 deletions.
11 changes: 9 additions & 2 deletions .readthedocs.yml
@@ -1,7 +1,14 @@
version: 2
version: "2"

build:
os: "ubuntu-22.04"
tools:
python: "3.10"

sphinx:
configuration: docs/conf.py

python:
version: 3.8
install:
- method: pip
path: .
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -34,11 +34,20 @@ Change Log

[1.9.8] - 20xx-yy-zz
----------------------
- [FIXED] the `backend.check_kirchoff` function was not correct when some elements were disconnected
(the wrong columns of the p_bus and q_bus was set in case of disconnected elements)
- [FIXED] `PandapowerBackend`, when no slack was present
- [FIXED] the "BaseBackendTest" class did not correctly detect divergence in most cases (which lead
to weird bugs in failing tests)
- [FIXED] an issue with imageio having deprecated the `fps` kwargs (see https://github.com/rte-france/Grid2Op/issues/569)
- [ADDED] A type of environment that does not perform the "emulation of the protections"
for some part of the grid (`MaskedEnvironment`) see https://github.com/rte-france/Grid2Op/issues/571
- [IMPROVED] the CI speed: by not testing every possible numpy version but only most ancient and most recent
- [IMPROVED] Runner now test grid2op version 1.9.6 and 1.9.7
- [IMPROVED] refacto `gridobj_cls._clear_class_attribute` and `gridobj_cls._clear_grid_dependant_class_attributes`
- [IMPROVED] the bahviour of the generic class `MakeBackend` used for the test suite.
- [IMPROVED] re introducing python 12 testing
- [IMPROVED] error messages in the automatic test suite (`AAATestBackendAPI`)

[1.9.7] - 2023-12-01
----------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/action.rst
Expand Up @@ -85,7 +85,7 @@ you want to perform on the grid. For more information you can consult the help o

To avoid extremely verbose things, as of grid2op 1.5.0, we introduced some convenience functions to allow
easier action construction. You can now do `act.load_set_bus = ...` instead of the previously way
more verbose `act.update({"set_bus": {"loads_id": ...}}`
more verbose `act.update({"set_bus": {"loads_id": ...}})`

.. _action-module-examples:

Expand Down
2 changes: 1 addition & 1 deletion docs/environment.rst
Expand Up @@ -101,7 +101,7 @@ be equivalent to starting into the "middle" of a video game. If that is the case
Finally, you might have noticed that each call to "env.reset" might take a while. This can dramatically
increase the training time, especially at the beginning. This is due to the fact that each time
`env.reset` is called, the whole chronics is read from the hard drive. If you want to lower this
impact then you might consult the `Optimize the data pipeline`_ section.
impact then you might consult the :ref:`environment-module-data-pipeline` page of the doc.

.. _environment-module-chronics-info:

Expand Down
Expand Up @@ -17,9 +17,9 @@

# agent_name : controlled substation id
zones = {"agent_0": [0, 1, 2, 3, 4],
"agent_1": [5,6,7,8,9,10,11,12,13]}
"agent_1": [5, 6, 7, 8, 9, 10, 11, 12, 13]}
env = MultiAgentEnv(cent_env, action_domains=zones)

env.seed(0)
dict_obs = env.reset()
# dict with: key=agent_name, value=the SubGridObservation

Expand Down
154 changes: 106 additions & 48 deletions examples/multi_agents/ray_example.py
Expand Up @@ -9,14 +9,15 @@
"""example with centralized observation and local actions"""
import warnings
import numpy as np
import copy

from gym.spaces import Discrete, Box

from ray.rllib.env.multi_agent_env import MultiAgentEnv as MAEnv
from ray.rllib.policy.policy import PolicySpec, Policy

import grid2op
from grid2op.Action.PlayableAction import PlayableAction
from grid2op.Action import PlayableAction
from grid2op.multi_agent.multiAgentEnv import MultiAgentEnv
from grid2op.gym_compat import GymEnv, BoxGymObsSpace, DiscreteActSpace

Expand All @@ -40,10 +41,18 @@
class MAEnvWrapper(MAEnv):
def __init__(self, env_config=None):
super().__init__()
if env_config is None:
env_config = {}

# you can customize stuff by using the "env config" if you want
backend = LightSimBackend()
if "backend_cls" in env_config:
backend = env_config["backend_cls"]
# you can do the same for other attribute to the environment

env = grid2op.make(ENV_NAME,
action_class=PlayableAction,
backend=LightSimBackend())
backend=backend)


self.ma_env = MultiAgentEnv(env, ACTION_DOMAINS)
Expand All @@ -55,41 +64,66 @@ def __init__(self, env_config=None):
# with the grid2op / gym interface.
self._gym_env = GymEnv(env)
self._gym_env.observation_space.close()

obs_attr_to_keep = ["gen_p", "rho"]
if "obs_attr_to_keep" in env_config:
obs_attr_to_keep = copy.deepcopy(env_config["obs_attr_to_keep"])
self._gym_env.observation_space = BoxGymObsSpace(env.observation_space,
attr_to_keep=["gen_p",
"rho"],
attr_to_keep=obs_attr_to_keep,
replace_nan_by_0=True # replace Nan by 0.
)

# we did not experiment yet with the "partially observable" setting
# so for now we suppose all agents see the same observation
# which is the full grid
self.observation_space = Box(shape=self._gym_env.observation_space.shape,
high=self._gym_env.observation_space.high,
low=self._gym_env.observation_space.low,
dtype=np.float32
)

# we represent the action as discrete action for now.
# It should work to encode then differently using the
# gym_compat module for example
self._conv_action_space = {
agent_id : DiscreteActSpace(self.ma_env.action_spaces[agent_id])
self._aux_observation_space = {
agent_id : BoxGymObsSpace(self.ma_env.observation_spaces[agent_id],
attr_to_keep=obs_attr_to_keep,
replace_nan_by_0=True # replace Nan by 0.
)
for agent_id in self.ma_env.agents
}

# to avoid "weird" pickle issues
self.action_space = {
agent_id : Discrete(n=self.ma_env.action_spaces[agent_id].n)
self.observation_space = {
agent_id : Box(low=self._aux_observation_space[agent_id].low,
high=self._aux_observation_space[agent_id].high,
dtype=self._aux_observation_space[agent_id].dtype)
for agent_id in self.ma_env.agents
}

def reset(self):
# we represent the action as discrete action for now.
# It should work to encode then differently using the
# gym_compat module for example
act_type = "discrete"
if "act_type" in env_config:
act_type = env_config["act_type"]

# for discrete actions
if act_type == "discrete":
self._conv_action_space = {
agent_id : DiscreteActSpace(self.ma_env.action_spaces[agent_id])
for agent_id in self.ma_env.agents
}

# to avoid "weird" pickle issues
self.action_space = {
agent_id : Discrete(n=self.ma_env.action_spaces[agent_id].n)
for agent_id in self.ma_env.agents
}
else:
raise NotImplementedError("Make the implementation in this case")

def reset(self, *, seed=None, options=None):
if seed is not None:
self.seed(seed)

# reset the underlying multi agent environment
obs = self.ma_env.reset()

return self._format_obs(obs)
return self._format_obs(obs), {}

def seed(self, seed):
return self.ma_env.seed(seed)

def _format_obs(self, grid2op_obs):
# NB we heavily use here that all agents see the same things
Expand Down Expand Up @@ -132,7 +166,9 @@ def step(self, actions):

# ignored for now
info = {}
return gym_obs, r, done, info
truncateds = {k: False for k in self.ma_env.agents}
truncateds['__all__'] = truncateds[first_agent_id]
return gym_obs, r, done, truncateds, info


def policy_mapping_fn(agent_id, episode, worker, **kwargs):
Expand All @@ -141,7 +177,8 @@ def policy_mapping_fn(agent_id, episode, worker, **kwargs):

if __name__ == "__main__":
import ray
from ray.rllib.agents.ppo import ppo
# from ray.rllib.agents.ppo import ppo
from ray.rllib.algorithms.ppo import PPO, PPOConfig
import json
import os
import shutil
Expand All @@ -164,34 +201,55 @@ def policy_mapping_fn(agent_id, episode, worker, **kwargs):
SELECT_ENV = MAEnvWrapper # Specifies the OpenAI Gym environment for Cart Pole
N_ITER = 1000 # Number of training runs.

config = ppo.DEFAULT_CONFIG.copy() # PPO's default configuration. See the next code cell.
config["log_level"] = "WARN" # Suppress too many messages, but try "INFO" to see what can be printed.

# Other settings we might adjust:
config["num_workers"] = 1 # Use > 1 for using more CPU cores, including over a cluster
config["num_sgd_iter"] = 10 # Number of SGD (stochastic gradient descent) iterations per training minibatch.
# I.e., for each minibatch of data, do this many passes over it to train.
config["sgd_minibatch_size"] = 64 # The amount of data records per minibatch
config["model"]["fcnet_hiddens"] = [100, 50] #
config["num_cpus_per_worker"] = 0 # This avoids running out of resources in the notebook environment when this cell is re-executed
config["vf_clip_param"] = 100

# multi agent specific config
config["multiagent"] = {
"policies" : {
"agent_0" : PolicySpec(
action_space=ray_ma_env.action_space["agent_0"]
),
"agent_1" : PolicySpec(
action_space=ray_ma_env.action_space["agent_1"]
)
},
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": ["agent_0", "agent_1"],
}
# config = ppo.DEFAULT_CONFIG.copy() # PPO's default configuration. See the next code cell.
# config["log_level"] = "WARN" # Suppress too many messages, but try "INFO" to see what can be printed.

# # Other settings we might adjust:
# config["num_workers"] = 1 # Use > 1 for using more CPU cores, including over a cluster
# config["num_sgd_iter"] = 10 # Number of SGD (stochastic gradient descent) iterations per training minibatch.
# # I.e., for each minibatch of data, do this many passes over it to train.
# config["sgd_minibatch_size"] = 64 # The amount of data records per minibatch
# config["model"]["fcnet_hiddens"] = [100, 50] #
# config["num_cpus_per_worker"] = 0 # This avoids running out of resources in the notebook environment when this cell is re-executed
# config["vf_clip_param"] = 100

# # multi agent specific config
# config["multiagent"] = {
# "policies" : {
# "agent_0" : PolicySpec(
# action_space=ray_ma_env.action_space["agent_0"]
# ),
# "agent_1" : PolicySpec(
# action_space=ray_ma_env.action_space["agent_1"]
# )
# },
# "policy_mapping_fn": policy_mapping_fn,
# "policies_to_train": ["agent_0", "agent_1"],
# }

# see ray doc for this...
# syntax changes every ray major version apparently...
config = PPOConfig()
config = config.training(gamma=0.9, lr=0.01, kl_coeff=0.3, train_batch_size=128)
config = config.resources(num_gpus=0)
config = config.rollouts(num_rollout_workers=1)

# multi agent parts
config.multi_agent(policies={
"agent_0" : PolicySpec(
action_space=ray_ma_env.action_space["agent_0"],
observation_space=ray_ma_env.observation_space["agent_0"]
),
"agent_1" : PolicySpec(
action_space=ray_ma_env.action_space["agent_1"],
observation_space=ray_ma_env.observation_space["agent_1"],
)
},
policy_mapping_fn = policy_mapping_fn,
policies_to_train= ["agent_0", "agent_1"])

#Trainer
agent = ppo.PPOTrainer(config, env=SELECT_ENV)
agent = PPO(config=config, env=SELECT_ENV)

results = []
episode_data = []
Expand Down

0 comments on commit 94ecfb4

Please sign in to comment.