/
test_issue_187.py
134 lines (118 loc) · 5.75 KB
/
test_issue_187.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import unittest
import warnings
import numpy as np
import grid2op
from grid2op.dtypes import dt_float
from grid2op.Reward import RedispReward
from grid2op.Runner import Runner
class Issue187Tester(unittest.TestCase):
"""
this test ensure that every "test" environment can be converted to gym
this test suit goes beyond the simple error raised in the github issue.
"""
def setUp(self) -> None:
self.tol = 1e-5 # otherwise issues with converting to / from float32
def test_issue_187(self):
"""test the range of the reward class"""
for env_name in grid2op.list_available_test_env():
if env_name == "blank":
continue
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with grid2op.make(
env_name, test=True, reward_class=RedispReward
) as env:
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
assert (
reward <= env.reward_range[1]
), f"error for reward_max for {env_name}"
assert (
reward >= env.reward_range[0]
), f"error for reward_min for {env_name}"
def test_custom_reward(self):
"""test i can generate the reward and use it in the envs"""
reward_cls = RedispReward.generate_class_custom_params(
alpha_redisph=2,
min_load_ratio=0.15,
worst_losses_ratio=0.05,
min_reward=-10.0,
reward_illegal_ambiguous=0.0,
least_losses_ratio=0.015,
)
for env_name in grid2op.list_available_test_env():
if env_name == "blank":
continue
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with grid2op.make(env_name, test=True, reward_class=reward_cls) as env:
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# test that reward is in the correct range
assert (
reward <= env.reward_range[1]
), f"error reward > reward_max for {env_name}"
assert (
reward >= env.reward_range[0]
), f"error reward < reward_min for {env_name}"
# test the parameters are effectively changed
# what should be computed
_alpha_redisph = dt_float(2)
_min_load_ratio = dt_float(0.15)
_worst_losses_ratio = dt_float(0.05)
_min_reward = dt_float(-10.0)
_reward_illegal_ambiguous = dt_float(0.0)
_least_losses_ratio = dt_float(0.015)
worst_marginal_cost = np.max(env.gen_cost_per_MW)
worst_load = dt_float(np.sum(env.gen_pmax))
# it's not the worst, but definitely an upper bound
worst_losses = dt_float(_worst_losses_ratio) * worst_load
worst_redisp = _alpha_redisph * np.sum(
env.gen_pmax
) # not realistic, but an upper bound
max_regret = (worst_losses + worst_redisp) * worst_marginal_cost / 12.
reward_min = dt_float(_min_reward)
least_loads = dt_float(
worst_load * _min_load_ratio
) # half the capacity of the grid
least_losses = dt_float(
_least_losses_ratio * least_loads
) # 1.5% of losses
least_redisp = dt_float(0.0) # lower_bound is 0
base_marginal_cost = np.min(
env.gen_cost_per_MW[env.gen_cost_per_MW > 0.0]
)
min_regret = (least_losses + least_redisp) * base_marginal_cost / 12.
reward_max = dt_float((max_regret - min_regret) / least_loads)
assert (
abs(env.reward_range[1] - reward_max) <= self.tol
), f"wrong reward max computed for {env_name}: {env.reward_range[1]} vs {reward_max}"
assert (
abs(env.reward_range[0] - reward_min) <= self.tol
), f"wrong reward min computed for {env_name}: {env.reward_range[0]} vs {reward_min}"
def test_custom_reward_runner(self):
"""test i can generate the reward and use it in the envs"""
reward_cls = RedispReward.generate_class_custom_params(
alpha_redisph=2,
min_load_ratio=0.15,
worst_losses_ratio=0.05,
min_reward=-10.0,
reward_illegal_ambiguous=0.0,
least_losses_ratio=0.015,
)
env_name = "l2rpn_case14_sandbox"
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with grid2op.make(env_name, test=True, reward_class=reward_cls) as env:
obs = env.reset()
runner = Runner(**env.get_params_for_runner())
res = runner.run(nb_episode=2, nb_process=2)
if __name__ == "__main__":
unittest.main()