-
Notifications
You must be signed in to change notification settings - Fork 116
/
n1Reward.py
101 lines (84 loc) · 3.76 KB
/
n1Reward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import copy
from grid2op.Reward import BaseReward
from grid2op.Action._backendAction import _BackendAction
class N1Reward(BaseReward):
"""
This class implements the "n-1" reward, which returns the maximum flows after a powerline
Examples
--------
This can be used as:
.. code-block:: python
import grid2op
from grid2op.Reward import N1Reward
L_ID = 0
env = grid2op.make("l2rpn_case14_sandbox",
reward_class=N1Reward(l_id=L_ID)
)
obs = env.reset()
obs, reward, *_ = env.step(env.action_space())
print(f"reward: {reward:.3f}")
print("We can check that it is exactly like 'simulate' on the current step the disconnection of the same powerline")
obs_n1, *_ = obs.simulate(env.action_space({"set_line_status": [(L_ID, -1)]}), time_step=0)
print(f"\tmax flow after disconnection of line {L_ID}: {obs_n1.rho.max():.3f}")
Notes
-----
It is also possible to use the `other_rewards` argument to simulate multiple powerline disconnections, for example:
.. code-block:: python
import grid2op
from grid2op.Reward import N1Reward
L_ID = 0
env = grid2op.make("l2rpn_case14_sandbox",
other_rewards={f"line_{l_id}": N1Reward(l_id=l_id) for l_id in [0, 1]}
)
obs = env.reset()
obs, reward, *_ = env.step(env.action_space())
print(f"reward: {reward:.3f}")
print("We can check that it is exactly like 'simulate' on the current step the disconnection of the same powerline")
obs_n1, *_ = obs.simulate(env.action_space({"set_line_status": [(L_ID, -1)]}), time_step=0)
print(f"\tmax flow after disconnection of line {L_ID}: {obs_n1.rho.max():.3f}")
"""
def __init__(self, l_id=0, logger=None):
BaseReward.__init__(self, logger=logger)
self._backend = None
self._backend_action = None
self.l_id = l_id
def initialize(self, env):
self._backend = env.backend.copy()
bk_act_cls = _BackendAction.init_grid(env.backend)
self._backend_action = bk_act_cls()
def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
if is_done:
return self.reward_min
self._backend_action.reset()
act = env.backend.get_action_to_set()
th_lim = env.get_thermal_limit()
th_lim[th_lim <= 1] = 1 # assign 1 for the thermal limit
this_n1 = copy.deepcopy(act)
self._backend_action += this_n1
self._backend.apply_action(self._backend_action)
self._backend._disconnect_line(self.l_id)
div_exc_ = None
try:
# TODO there is a bug in lightsimbackend that make it crash instead of diverging
conv, div_exc_ = self._backend.runpf()
except Exception as exc_:
conv = False
div_exc_ = exc_
if conv:
flow = self._backend.get_line_flow()
res = (flow / th_lim).max()
else:
self.logger.info(f"Divergence of the backend at step {env.nb_time_step} for N1Reward with error `{div_exc_}`")
res = -1
return res
def close(self):
self._backend.close()
del self._backend
self._backend = None