/
RedispReward.py
211 lines (175 loc) · 9.22 KB
/
RedispReward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import sys
import numpy as np
import re
from grid2op._glop_platform_info import _IS_WINDOWS, _IS_LINUX, _IS_MACOS
from grid2op.Exceptions import Grid2OpException
from grid2op.Reward.BaseReward import BaseReward
from grid2op.dtypes import dt_float
class RedispReward(BaseReward):
"""
This reward can be used for environments where redispatching is available. It assigns a cost to redispatching action
and penalizes with the losses.
This is the closest reward to the score used for the l2RPN competitions.
Examples
---------
You can use this reward in any environment with:
.. code-block:
import grid2op
from grid2op.Reward import RedispReward
# then you create your environment with it:
NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic"
env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=RedispReward)
# and do a step with a "do nothing" action
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# the reward is computed with the RedispReward class
# NB this is the default reward of many environments in the grid2op framework
This class depends on some "meta parameters". These meta parameters can be changed when the class is created
in the following way:
.. code-block:: python
import grid2op
from grid2op.Reward import RedispReward
reward_cls = RedispReward.generate_class_custom_params(alpha_redisph=5,
min_load_ratio=0.1,
worst_losses_ratio=0.05,
min_reward=-10.,
reward_illegal_ambiguous=0.,
least_losses_ratio=0.015)
env_name = ... #eg "l2rpn_case14_sandbox"
env = grid2op.make(env_name,reward_class=reward_cls)
These meta parameters means:
- alpha_redisp: extra cost paid when performing redispatching. For 1MW of redispatching done, you pay
"alpha_redisph"
- min_load_ratio: how to compute the minimum load on the grid, based on the total generation (sum of gen_pmax)
- worst_losses_ratio: worst loss possible on the grid (5% is an upper bound for normal grid)
- min_reward: what is the minimum reward of this class (can be parametrized, and is only used when there is
a game over
- reward_illegal_ambiguous: reward given when the action is illegal or ambiguous
- least_losses_ratio: the minimum loss you can have (1.5% of the total demand should be a lower bound for real grid)
Notes
------
On windows and MacOs, due to a compatibility issue with multi-processing, it is not possible to have different
"RedisReward" with different meta parameters (see the "Examples" section).
"""
_alpha_redisp = dt_float(5.0)
_min_load_ratio = dt_float(0.1) # min load = min_load_ratio * max_load
_worst_losses_ratio = dt_float(
0.05
) # worst_losses = worst_losses_ratio * worst_load
_min_reward = dt_float(-10.0) # reward when game over
_reward_illegal_ambiguous = dt_float(
0.0
) # reward when action is illegal or ambiguous
_least_losses_ratio = dt_float(
0.015
) # least_losses = least_losses_ratio * least_loads
def __init__(self, logger=None):
BaseReward.__init__(self, logger=logger)
self.reward_min = None
self.reward_max = None
self.max_regret = dt_float(0.0)
self.reward_illegal_ambiguous = None
@classmethod
def generate_class_custom_params(
cls,
alpha_redisph=5.0,
min_load_ratio=0.1, # min load = min_load_ratio * max_load
worst_losses_ratio=0.05, # worst_losses = worst_losses_ratio * worst_load
min_reward=-10.0,
least_losses_ratio=0.015, # least_losses = least_losses_ratio * least_loads
reward_illegal_ambiguous=0.0,
):
if _IS_LINUX:
# on linux it's fine, i can create new classes for each meta parameters
nm_res = f"RedispReward_{alpha_redisph:.2f}_{min_load_ratio:.2f}_{worst_losses_ratio:.2f}"
nm_res += f"_{min_reward:.2f}_{least_losses_ratio:.2f}_{reward_illegal_ambiguous:.2f}"
nm_res = re.sub("\\.", "@", nm_res)
cls_attr_as_dict = {
"_alpha_redisp": dt_float(alpha_redisph),
"_min_load_ratio": dt_float(min_load_ratio),
"_worst_losses_ratio": dt_float(worst_losses_ratio),
"_min_reward": dt_float(min_reward),
"_least_losses_ratio": dt_float(least_losses_ratio),
"_reward_illegal_ambiguous": dt_float(reward_illegal_ambiguous),
}
res_cls = type(nm_res, (cls,), cls_attr_as_dict)
res_cls.__module__ = cls.__module__
setattr(sys.modules[cls.__module__], nm_res, res_cls)
globals()[nm_res] = res_cls
else:
# i mess with the default parameters in the base class, i know i know it's not pretty, but hey...
# TODO make that prettier and clean the way to make the reward in the env (for example allow to pass
# objects and not just class)
cls._alpha_redisp = dt_float(alpha_redisph)
cls._min_load_ratio = dt_float(min_load_ratio)
cls._worst_losses_ratio = dt_float(worst_losses_ratio)
cls._min_reward = dt_float(min_reward)
cls._least_losses_ratio = dt_float(least_losses_ratio)
cls._reward_illegal_ambiguous = dt_float(reward_illegal_ambiguous)
res_cls = cls
return res_cls
def initialize(self, env):
if not env.redispatching_unit_commitment_availble:
raise Grid2OpException(
"Impossible to use the RedispReward reward with an environment without generators "
"cost. Please make sure env.redispatching_unit_commitment_availble is available."
)
cls_ = type(self)
worst_marginal_cost = np.max(env.gen_cost_per_MW)
worst_load = dt_float(np.sum(env.gen_pmax))
# it's not the worst, but definitely an upper bound
worst_losses = dt_float(cls_._worst_losses_ratio) * worst_load
worst_redisp = cls_._alpha_redisp * np.sum(env.gen_pmax) # not realistic, but an upper bound
self.max_regret = (worst_losses + worst_redisp) * worst_marginal_cost * env.delta_time_seconds / 3600.0
self.reward_min = dt_float(cls_._min_reward)
least_loads = dt_float(
worst_load * cls_._min_load_ratio
) # half the capacity of the grid
least_losses = dt_float(
cls_._least_losses_ratio * least_loads * env.delta_time_seconds / 3600.0
) # 1.5% of losses
least_redisp = dt_float(0.0) # lower_bound is 0
base_marginal_cost = np.min(env.gen_cost_per_MW[env.gen_cost_per_MW > 0.0])
min_regret = (least_losses + least_redisp) * base_marginal_cost
self.reward_max = dt_float((self.max_regret - min_regret) / least_loads)
self.reward_illegal_ambiguous = cls_._reward_illegal_ambiguous
def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
res = None
if is_done:
# if the episode is over and it's my fault (i did a blackout) i strongly
if has_error or is_illegal or is_ambiguous:
res = self.reward_min
elif is_illegal or is_ambiguous:
res = self._reward_illegal_ambiguous
if res is None:
# compute the losses
gen_p, *_ = env.backend.generators_info()
load_p, *_ = env.backend.loads_info()
# don't forget to convert MW to MWh !
losses = (np.sum(gen_p) - np.sum(load_p)) * env.delta_time_seconds / 3600.0
# compute the marginal cost
gen_activeprod_t = env._gen_activeprod_t
marginal_cost = np.max(env.gen_cost_per_MW[gen_activeprod_t > 0.0])
# redispatching amount
actual_dispatch = env._actual_dispatch
redisp_cost = (
self._alpha_redisp * np.sum(np.abs(actual_dispatch)) * marginal_cost * env.delta_time_seconds / 3600.0
)
# cost of losses
losses_cost = losses * marginal_cost
# cost of storage
c_storage = np.sum(np.abs(env._storage_power)) * marginal_cost * env.delta_time_seconds / 3600.0
# total "regret"
regret = losses_cost + redisp_cost + c_storage
# compute reward
reward = self.max_regret - regret
# divide it by load, to be less sensitive to load variation
res = dt_float(reward / np.sum(load_p))
return res