-
Notifications
You must be signed in to change notification settings - Fork 116
/
CombinedReward.py
103 lines (83 loc) · 3.6 KB
/
CombinedReward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
from grid2op.Reward.BaseReward import BaseReward
from grid2op.dtypes import dt_float
class CombinedReward(BaseReward):
"""
This class allows to combine multiple pre defined reward. The reward it computes will
be the sum of all the sub rewards it is made of.
Each sub reward is identified by a key.
It is used a bit differently that the other rewards. See the section example for more information.
Examples
--------
.. code-block:: python
import grid2op
from grid2op.Reward import GameplayReward, FlatReward, CombinedReward
env = grid2op.make(..., reward_class=CombinedReward)
cr = self.env.get_reward_instance()
cr.addReward("Gameplay", GameplayReward(), 1.0)
cr.addReward("Flat", FlatReward(), 1.0)
cr.initialize(self.env)
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# reward here is computed by summing the results of what would have
# given `GameplayReward` and the one from `FlatReward`
"""
def __init__(self):
BaseReward.__init__(self)
self.reward_min = dt_float(0.0)
self.reward_max = dt_float(0.0)
self.rewards = {}
def addReward(self, reward_name, reward_instance, reward_weight = 1.0):
self.rewards[reward_name] = {
"instance": reward_instance,
"weight": dt_float(reward_weight)
}
return True
def removeReward(self, reward_name):
if reward_name in self.rewards:
self.rewards.pop(reward_name)
return True
return False
def updateRewardWeight(self, reward_name, reward_weight):
if reward_name in self.rewards:
self.rewards[reward_name]["weight"] = reward_weight
return True
return False
def __iter__(self):
for k, v in super().__iter__():
yield (k, v)
for k, v in self.rewards.items():
r_dict = dict(v["instance"])
r_dict["weight"] = float(v["weight"])
yield (k, r_dict)
def initialize(self, env):
self.reward_min = dt_float(0.0)
self.reward_max = dt_float(0.0)
for key, reward in self.rewards.items():
reward_w = reward["weight"]
reward_instance = reward["instance"]
reward_instance.initialize(env)
self.reward_max += dt_float(reward_instance.reward_max * reward_w)
self.reward_min += dt_float(reward_instance.reward_min * reward_w)
env.reward_range = self.get_range()
def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
res = dt_float(0.0)
# Loop over registered rewards
for key, reward in self.rewards.items():
r_instance = reward["instance"]
# Call individual reward
r = r_instance(action, env, has_error, is_done, is_illegal, is_ambiguous)
# Sum by weighted result
w = dt_float(reward["weight"])
res += dt_float(r) * w
# Return total sum
return res
def close(self):
for key, reward in self.rewards.items():
reward["instance"].close()