-
Notifications
You must be signed in to change notification settings - Fork 113
/
_alarmScore.py
159 lines (128 loc) · 6.73 KB
/
_alarmScore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np
import copy
from grid2op.Exceptions import Grid2OpException
from grid2op.Reward import AlarmReward
from grid2op.dtypes import dt_float
class _AlarmScore(AlarmReward):
"""
.. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\
It **must not** serve as a reward. This scored needs to be minimized,
and a reward needs to be maximized! Also, this "reward" is not scaled or anything. Use it as your
own risk.
Implemented as a reward to make it easier to use in the context of the L2RPN competitions, this "reward"
computed the "grid operation cost". It should not be used to train an agent.
The "reward" the closest to this score is given by the :class:`AlarmReward` class.
This reward is based on the "alarm feature" where the agent is asked to send information about potential issue
on the grid.
On this case, when the environment is in a "game over" state (eg it's the end) then the reward is computed
the following way:
- if the environment has been successfully manage until the end of the chronics, then 1.0 is returned
- if no alarm has been raised, then -2.0 is return
- points for pointing to the right zones are computed based on the lines disconnected either in a short window
before game over or otherwise at the time of game over
Examples
---------
You can use this reward in any environment with:
.. code-block:: python
import grid2op
from grid2op.Reward import AlarmReward
# then you create your environment with it:
NAME_OF_THE_ENVIRONMENT = "l2rpn_case14_sandbox"
env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=AlarmScore)
# and do a step with a "do nothing" action
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# the reward is computed with the AlarmScore class
"""
def __init__(self, logger=None):
AlarmReward.__init__(self, logger=logger)
# required if you want to design a custom reward taking into account the
# alarm feature
self.reward_min = dt_float(-2.0)
# we keep other parameters values from AlarmReward as is
self.mult_for_right_zone = 1.5
self.window_disconnection = 4
self.disc_lines_all_before_cascade = []
self.n_line = None
self._i_am_simulate = True
def initialize(self, env):
if not env._has_attention_budget:
raise Grid2OpException(
'Impossible to use the "_AlarmScore" with an environment for which this feature '
'is disabled. Please make sure "env._has_attention_budget" is set to ``True`` or '
"change the reward class with `grid2op.make(..., reward_class=AnyOtherReward)`"
)
self.n_line = env.n_line
self.reset(env)
def reset(self, env):
super().reset(env)
self.window_disconnection = max(self.best_time - self.window_size, 4)
self.disc_lines_all_before_cascade = []
self._i_am_simulate = self.is_simulated_env(env)
def _lines_disconnected_first(self, disc_lines_at_cascading_time):
"""
here we detect the disconnected lines that we will consider to compute the `mult_for_zone` multiplying factor.
Either the lines that were disconnected in a short period before final failure. Otherwise the first lines
disconnected at the time of failure
:param disc_lines_at_cascading_time: lines that are disconnected first at time of failure
:return:
"""
disc_lines_to_consider_for_score = np.zeros(self.n_line, dtype=bool)
nb_obs = len(self.disc_lines_all_before_cascade)
for step in range(nb_obs - self.window_disconnection, nb_obs):
disc_lines_to_consider_for_score[
self.disc_lines_all_before_cascade[step] >= 0
] = True
if disc_lines_to_consider_for_score.sum() == 0:
disc_lines_to_consider_for_score = disc_lines_at_cascading_time == 0
# if we are there, it is because we have identified before that the failure is due to disconnected powerlines
assert (disc_lines_to_consider_for_score).any()
# we transform the vector so that disconnected lines have a zero, to be coherent with env._disc_lines
return 1 - disc_lines_to_consider_for_score
def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
if self._i_am_simulate:
return self.reward_no_game_over
disc_lines_now = env._disc_lines
if is_done:
if not has_error:
# agent went until the end
return self.reward_max
if np.all(env._disc_lines == -1):
# game over is not caused by the tripping of a powerline
return self.reward_min
if len(env._attention_budget._all_successful_alarms) == 0:
# no alarm have been sent, so it's the minimum
return self.reward_min
successfull_alarms = env._attention_budget._all_successful_alarms
step_game_over = env.nb_time_step
disc_lines_to_consider_for_score = self._lines_disconnected_first(
disc_lines_now
)
# so now i can consider the alarms.
best_score, is_alarm_used = self.reward_min, False
for alarm in successfull_alarms:
tmp_sc, tmp_is = self._points_for_alarm(
*alarm,
step_game_over=step_game_over,
disc_lines=disc_lines_to_consider_for_score,
env=env
)
if tmp_sc > best_score:
best_score = tmp_sc
is_alarm_used = tmp_is
self.is_alarm_used = is_alarm_used
return best_score
else:
# make sure to deepcopy, otherwise it gets updated with the last timestep value for every previous timesteps
# we log the line disconnected over time
# TODO have a cache there and store only the last few states, most of what is stored here is not used
self.disc_lines_all_before_cascade.append(copy.deepcopy(disc_lines_now))
res = self.reward_no_game_over
return res