/
rewardHelper.py
144 lines (114 loc) · 5.35 KB
/
rewardHelper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import copy
from grid2op.Reward.baseReward import BaseReward
from grid2op.Reward.constantReward import ConstantReward
from grid2op.Exceptions import Grid2OpException
class RewardHelper:
"""
INTERNAL
.. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\
It is a class internal to the :class:`grid2op.Environment.Environment` do not use outside
of its purpose and do not attempt to modify it.
This class aims at making the creation of rewards class more automatic by the :class:`grid2op.Environment`.
It is not recommended to derived or modified this class. If a different reward need to be used, it is recommended
to build another object of this class, and change the :attr:`RewardHelper.rewardClass` attribute.
Attributes
----------
rewardClass: ``type``
Type of reward that will be use by this helper. Note that the type (and not an instance / object of that type)
must be given here. It defaults to :class:`ConstantReward`
template_reward: :class:`BaseReward`
An object of class :attr:`RewardHelper.rewardClass` used to compute the rewards.
"""
def __init__(self, reward_func=ConstantReward, logger=None):
self.rewardClass = None
self.template_reward = None
self.logger = logger
self.change_reward(reward_func)
def initialize(self, env):
"""
This function initializes the template_reward with the environment. It is used especially for using
:func:`RewardHelper.range`.
Parameters
----------
env: :class:`grid2op.Environment.Environment`
The current used environment.
"""
self.template_reward.initialize(env)
def range(self):
"""
Provides the range of the rewards.
Returns
-------
res: ``(float, float)``
The minimum reward per time step (possibly infinity) and the maximum reward per timestep (possibly infinity)
"""
return self.template_reward.get_range()
def reset(self, env):
"""called each time `env.reset` is invoked"""
self.template_reward.reset(env)
def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
"""
Gives the reward that follows the execution of the :class:`grid2op.BaseAction.BaseAction` action in the
:class:`grid2op.Environment.Environment` env;
Parameters
----------
action: :class:`grid2op.Action.Action`
The action performed by the BaseAgent.
env: :class:`grid2op.Environment.Environment`
The current environment.
has_error: ``bool``
Does the action caused an error, such a diverging powerflow for example= (``True``: the action caused
an error)
is_done: ``bool``
Is the game over (``True`` = the game is over)
is_illegal: ``bool``
Is the action legal or not (``True`` = the action was illegal). See
:class:`grid2op.Exceptions.IllegalAction` for more information.
is_ambiguous: ``bool``
Is the action ambiguous or not (``True`` = the action was ambiguous). See
:class:`grid2op.Exceptions.AmbiguousAction` for more information.
Returns
-------
res: ``float``
The computed reward
"""
res = self.template_reward(
action, env, has_error, is_done, is_illegal, is_ambiguous
)
return res
def change_reward(self, reward_func):
"""
INTERNAL
.. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\
Use `env.change_reward` instead (:func:`grid2op.Environment.BaseEnv.change_reward`)
"""
if isinstance(reward_func, BaseReward):
# reward object given directly
self.rewardClass = type(reward_func)
self.template_reward = copy.deepcopy(reward_func)
elif issubclass(reward_func, BaseReward):
# reward is provided as a class
self.rewardClass = reward_func
try:
self.template_reward = reward_func(logger=self.logger)
except TypeError as exc_:
self.logger.warn(f"Reward \"{reward_func.__name__}\" does not support the logger feature. Error was : {exc_}")
# old (<= 1.7.0) behaviour
self.template_reward = reward_func()
else:
raise Grid2OpException(
f"Impossible to build a reward with input reward_func={reward_func}. "
f"NB `reward_func` should be either an object of type `BaseReward` (or "
f"one of its derivative) "
f"or a class that inherit from `BaseReward`"
)
def close(self):
"""clsoe the reward helper (in case there are specific behaviour for certain rewards"""
self.template_reward.close()