/
discrete_gym_actspace.py
375 lines (302 loc) · 16.6 KB
/
discrete_gym_actspace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import copy
import warnings
# from gym.spaces import Discrete
from grid2op.Exceptions import Grid2OpException
from grid2op.Action import ActionSpace
from grid2op.Converter import IdToAct
from grid2op.gym_compat.utils import (ALL_ATTR_FOR_DISCRETE,
ATTR_DISCRETE,
GYM_AVAILABLE,
GYMNASIUM_AVAILABLE)
# TODO test that it works normally
# TODO test the casting in dt_int or dt_float depending on the data
# TODO test the scaling
# TODO doc
# TODO test the function part
class __AuxDiscreteActSpace:
"""
TODO the documentation of this class is in progress.
This class allows to convert a grid2op action space into a gym "Discrete". This means that the action are
labeled, and instead of describing the action itself, you provide only its ID.
Let's take an example of line disconnection. In the "standard" gym representation you need to:
.. code-block:: python
import grid2op
import numpy as np
from grid2op.gym_compat import GymEnv
env_name = "l2rpn_case14_sandbox" # or any other name
env = grid2op.make(env_name)
gym_env = GymEnv(env)
# now do an action
gym_act = {}
gym_act["set_bus"] = np.zeros(env.n_line, dtype=np.int)
l_id = ... # the line you want to disconnect
gym_act["set_bus"][l_id] = -1
obs, reward, done, truncated, info = gym_env.step(gym_act)
This has the advantage to be as close as possible to raw grid2op. But the main drawback is that
most of RL framework are not able to do this kind of modification easily. For discrete actions,
what is often do is:
1) enumerate all possible actions (say you have n different actions)
2) assign a unique id to all actions (say from 0 to n-1)
3) have a "policy" output a vector of size n with each component
representing an action (eg `vect[42]` represents the score the policy assign to action `42`)
Instead of having everyone doing the modifications "on its own" we developed the :class:`DiscreteActSpace`
that does exactly this, in a single line of code:
.. code-block:: python
import grid2op
import numpy as np
from grid2op.gym_compat import GymEnv, DiscreteActSpace
env_name = "l2rpn_case14_sandbox" # or any other name
env = grid2op.make(env_name)
gym_env = GymEnv(env)
gym_env.action_space = DiscreteActSpace(env.action_space,
attr_to_keep=["set_bus",
"set_line_status",
# or anything else
]
)
# do action with ID 42
gym_act = 42
obs, reward, done, truncated, info = gym_env.step(gym_act)
# to know what the action did, you can
# print(gym_env.action_space.from_gym(gym_act))
It is related to the :class:`MultiDiscreteActSpace` but compared to this other representation, it
does not allow to do "multiple actions". Typically, if you use the snippets below:
.. code-block:: python
import grid2op
env_name = "l2rpn_case14_sandbox" # or any other name
env = grid2op.make(env_name)
from grid2op.gym_compat import GymEnv, MultiDiscreteActSpace, DiscreteActSpace
gym_env1 = GymEnv(env)
gym_env2 = GymEnv(env)
gym_env1.action_space = MultiDiscreteActSpace(env.action_space,
attr_to_keep=['redispatch', "curtail", "one_sub_set"])
gym_env2.action_space = DiscreteActSpace(env.action_space,
attr_to_keep=['redispatch', "curtail", "set_bus"])
Then at each step, `gym_env1` will allow to perform a redispatching action (on any number of generators),
a curtailment
action (on any number of generators) __**AND**__ changing the topology at one substation. But at each
steps, the agent should predicts lots of "number".
On the other hand, at each step, the agent for `gym_env2` will have to predict a single integer (which is
usually the case in most RL environment) but it this action will affect redispatching on a single generator,
perform curtailment on a single generator __**OR**__ changing the topology at one substation. But at each
steps, the agent should predicts only one "number".
The action set is then largely constrained compared to the :class:`MultiDiscreteActSpace`
.. note::
This class is really closely related to the :class:`grid2op.Converter.IdToAct`. It basically "maps"
this "IdToAct" into a type of gym space, which, in this case, will be a `Discrete` one.
.. note::
By default, the "do nothing" action is encoded by the integer '0'.
.. warning::
Depending on the presence absence of gymnasium and gym packages this class might behave differently.
In grid2op we tried to maintain compatibility both with gymnasium (newest) and gym (legacy,
no more maintained) RL packages. The behaviour is the following:
- :class:`DiscreteActSpace` will inherit from gymnasium if it's installed
(in this case it will be :class:`DiscreteActSpaceGymnasium`), otherwise it will
inherit from gym (and will be exactly :class:`DiscreteActSpaceLegacyGym`)
- :class:`DiscreteActSpaceGymnasium` will inherit from gymnasium if it's available and never from
from gym
- :class:`DiscreteActSpaceLegacyGym` will inherit from gym if it's available and never from
from gymnasium
See :ref:`gymnasium_gym` for more information
Examples
--------
We recommend to use it like:
.. code-block:: python
import grid2op
env_name = "l2rpn_case14_sandbox" # or any other name
env = grid2op.make(env_name)
from grid2op.gym_compat import GymEnv, DiscreteActSpace
gym_env = GymEnv(env)
gym_env.observation_space = DiscreteActSpace(env.observation_space,
attr_to_keep=['redispatch', "curtail", "set_bus"])
The possible attribute you can provide in the "attr_to_keep" are:
- "set_line_status"
- "set_line_status_simple" (grid2op >= 1.6.6) : set line status adds 5 actions per powerlines:
1) disconnect it
2) connect origin side to busbar 1 and extermity side to busbar 1
3) connect origin side to busbar 1 and extermity side to busbar 2
4) connect origin side to busbar 2 and extermity side to busbar 1
5) connect origin side to busbar 2 and extermity side to busbar 2
This is "over complex" for most use case where you just want to "connect it"
or "disconnect it". If you want the simplest version, just use "set_line_status_simple".
- "change_line_status"
- "set_bus": corresponds to changing the topology using the "set_bus" (equivalent to the
"one_sub_set" keyword in the "attr_to_keep" of the :class:`MultiDiscreteActSpace`)
- "change_bus": corresponds to changing the topology using the "change_bus" (equivalent to the
"one_sub_change" keyword in the "attr_to_keep" of the :class:`MultiDiscreteActSpace`)
- "redispatch"
- "set_storage"
- "curtail"
- "curtail_mw" (same effect as "curtail")
If you do not want (each time) to build all the actions from the action space, but would rather
save the actions you find the most interesting and then reload them, you can, for example:
.. code-block:: python
import grid2op
from grid2op.gym_compat import GymEnv, DiscreteActSpace
env_name = "l2rpn_case14_sandbox" # or any other name
env = grid2op.make(env_name)
gym_env = GymEnv(env)
action_list = ... # a list of action, that can be processed by
# IdToAct.init_converter (all_actions): see
# https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.init_converter
gym_env.observation_space = DiscreteActSpace(env.observation_space,
action_list=action_list)
.. note::
This last version (providing explicitly the actions you want to keep and their ID)
is much (much) safer and reproducible. Indeed, the
actions usable by your agent will be the same (and in the same order)
regardless of the grid2op version, of the person using it, of pretty
much everything.
It might not be consistent (between different grid2op versions)
if the actions are built from scratch (for example, depending on the
grid2op version other types of actions can be made, such as curtailment,
or actions on storage units) like it's the case with the key-words
(*eg* "set_bus") you pass as argument in the `attr_to_keep`
"""
def __init__(
self,
grid2op_action_space,
attr_to_keep=ALL_ATTR_FOR_DISCRETE,
nb_bins=None,
action_list=None,
):
if not isinstance(grid2op_action_space, ActionSpace):
raise Grid2OpException(
f"Impossible to create a BoxGymActSpace without providing a "
f"grid2op action_space. You provided {type(grid2op_action_space)}"
f'as the "grid2op_action_space" attribute.'
)
if nb_bins is None:
nb_bins = {"redispatch": 7, "set_storage": 7, "curtail": 7}
if "raise_alert" in attr_to_keep or "raise_alarm" in attr_to_keep:
raise Grid2OpException("This converter cannot be use to raise alarm or raise alert. "
"Please use the MultiDiscreteActSpace space for this purpose.")
act_sp = grid2op_action_space
self.action_space = copy.deepcopy(act_sp)
if attr_to_keep == ALL_ATTR_FOR_DISCRETE:
# by default, i remove all the attributes that are not supported by the action type
# i do not do that if the user specified specific attributes to keep. This is his responsibility in
# in this case
attr_to_keep = {
el for el in attr_to_keep if grid2op_action_space.supports_type(el)
}
else:
if action_list is not None:
raise Grid2OpException(
"Impossible to specify a list of attributes "
"to keep (argument attr_to_keep) AND a list of "
"action to use (argument action_list)."
)
for el in attr_to_keep:
if el not in ATTR_DISCRETE and action_list is None:
warnings.warn(
f'The class "DiscreteActSpace" should mainly be used to consider only discrete '
f"actions (eg. set_line_status, set_bus or change_bus). Though it is possible to use "
f'"{el}" when building it, be aware that this continuous action will be treated '
f"as discrete by splitting it into bins. "
f'Consider using the "BoxGymActSpace" for these attributes.'
)
self._attr_to_keep = sorted(attr_to_keep)
self._nb_bins = nb_bins
self.dict_properties = {
"set_line_status": act_sp.get_all_unitary_line_set,
"change_line_status": act_sp.get_all_unitary_line_change,
"set_bus": act_sp.get_all_unitary_topologies_set,
"change_bus": act_sp.get_all_unitary_topologies_change,
"redispatch": act_sp.get_all_unitary_redispatch,
"set_storage": act_sp.get_all_unitary_storage,
"curtail": act_sp.get_all_unitary_curtail,
"curtail_mw": act_sp.get_all_unitary_curtail,
# "raise_alarm": act_sp.get_all_unitary_alarm,
# "raise_alert": act_sp.get_all_unitary_alert,
"set_line_status_simple": act_sp.get_all_unitary_line_set_simple,
}
if action_list is None:
self.converter = None
n_act = self._get_info()
else:
self.converter = IdToAct(self.action_space)
self.converter.init_converter(all_actions=action_list)
n_act = self.converter.n
# initialize the base container
type(self)._DiscreteType.__init__(self, n=n_act)
def _get_info(self):
converter = IdToAct(self.action_space)
li_act = [self.action_space()]
for attr_nm in self._attr_to_keep:
if attr_nm in self.dict_properties:
if attr_nm not in self._nb_bins:
li_act += self.dict_properties[attr_nm](self.action_space)
else:
if attr_nm == "curtail" or attr_nm == "curtail_mw":
li_act += self.dict_properties[attr_nm](
self.action_space, num_bin=self._nb_bins[attr_nm]
)
else:
li_act += self.dict_properties[attr_nm](
self.action_space,
num_down=self._nb_bins[attr_nm],
num_up=self._nb_bins[attr_nm],
)
else:
li_keys = "\n\t- ".join(sorted(list(self.dict_properties.keys())))
raise RuntimeError(
f'Unknown action attributes "{attr_nm}". Supported attributes are: '
f"\n\t- {li_keys}"
)
converter.init_converter(li_act)
self.converter = converter
return self.converter.n
def from_gym(self, gym_act):
"""
This is the function that is called to transform a gym action (in this case a numpy array!)
sent by the agent
and convert it to a grid2op action that will be sent to the underlying grid2op environment.
Parameters
----------
gym_act: ``int``
the gym action (a single integer for this action space)
Returns
-------
grid2op_act: :class:`grid2op.Action.BaseAction`
The corresponding grid2op action.
"""
res = self.converter.all_actions[int(gym_act)]
return res
def close(self):
pass
if GYM_AVAILABLE:
from gym.spaces import Discrete as LegGymDiscrete
from grid2op.gym_compat.box_gym_actspace import BoxLegacyGymActSpace
from grid2op.gym_compat.continuous_to_discrete import ContinuousToDiscreteConverterLegacyGym
DiscreteActSpaceLegacyGym = type("DiscreteActSpaceLegacyGym",
(__AuxDiscreteActSpace, LegGymDiscrete, ),
{"_gymnasium": False,
"_DiscreteType": LegGymDiscrete,
"_BoxGymActSpaceType": BoxLegacyGymActSpace,
"_ContinuousToDiscreteConverterType": ContinuousToDiscreteConverterLegacyGym,
"__module__": __name__})
DiscreteActSpaceLegacyGym.__doc__ = __AuxDiscreteActSpace.__doc__
DiscreteActSpace = DiscreteActSpaceLegacyGym
DiscreteActSpace.__doc__ = __AuxDiscreteActSpace.__doc__
if GYMNASIUM_AVAILABLE:
from gymnasium.spaces import Discrete
from grid2op.gym_compat.box_gym_actspace import BoxGymnasiumActSpace
from grid2op.gym_compat.continuous_to_discrete import ContinuousToDiscreteConverterGymnasium
DiscreteActSpaceGymnasium = type("MultiDiscreteActSpaceGymnasium",
(__AuxDiscreteActSpace, Discrete, ),
{"_gymnasium": True,
"_DiscreteType": Discrete,
"_BoxGymActSpaceType": BoxGymnasiumActSpace,
"_ContinuousToDiscreteConverterType": ContinuousToDiscreteConverterGymnasium,
"__module__": __name__})
DiscreteActSpaceGymnasium.__doc__ = __AuxDiscreteActSpace.__doc__
DiscreteActSpace = DiscreteActSpaceGymnasium
DiscreteActSpace.__doc__ = __AuxDiscreteActSpace.__doc__