-
Notifications
You must be signed in to change notification settings - Fork 116
/
OpponentSpace.py
190 lines (157 loc) · 8.22 KB
/
OpponentSpace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
from grid2op.Exceptions import OpponentError
class OpponentSpace(object):
"""
Is similar to the action space, but for the opponent.
Attributes
----------
action_space: :class:`grid2op.Action.ActionSpace`
The action space defining which action the Opponent are allowed to take
init_budget: ``float``
The initial budget of the opponent
compute_budget: :class:`grid2op.Opponent.ActionBudget`
The tool used to compute the budget
opponent: :class:`grid2op.Opponent.BaseOpponent`
The agent that will take malicious actions.
previous_fails: ``bool``
Whether the last attack of the opponent failed or not
budget_per_timestep: ``float``
The increase of the opponent budget per time step (if any)
"""
def __init__(self, compute_budget, init_budget, opponent, attack_duration, attack_cooldown,
budget_per_timestep=0., action_space=None):
if action_space is not None:
if not isinstance(action_space, compute_budget.action_space):
raise OpponentError("BaseAction space provided to build the agent is not a subclass from the"
"action space to compute the cost of each action.")
self.action_space = action_space
else:
self.action_space = compute_budget.action_space
self.init_budget = init_budget
self.budget = init_budget
self.compute_budget = compute_budget
self.opponent = opponent
self._do_nothing = self.action_space()
self.previous_fails = False
self.budget_per_timestep = budget_per_timestep
self.attack_duration = attack_duration
self.attack_cooldown = attack_cooldown
self.current_attack_duration = 0
self.current_attack_cooldown = attack_cooldown
self.last_attack = None
if init_budget < 0.:
raise OpponentError("An opponent should at least have a positive (or null) budget. If you "
"want to deactivate the opponent set its budget to 0 and use the"
"DontAct class as the \"opponent_class\"")
# TODO do i add it back
# if not isinstance(opponent_reward_class, BaseReward):
# raise OpponentError("Impossible to build an opponent reward with a reward of type {}".format(opponent_reward_class))
# self.opp_reward_helper = RewardHelper(opponent_reward_class)
def init_opponent(self, partial_env, **kwargs):
"""
Generic function used to initialize the opponent. For example, if an opponent reads from a file, the
path where is the file is located should be pass with this method.
"""
self.opponent.init(partial_env=partial_env, **kwargs)
def reset(self):
"""
Reset the state of the Opponent to its original state, in particular re assign the proper budget to it.
"""
self.budget = self.init_budget
self.previous_fails = False
self.current_attack_duration = 0
self.current_attack_cooldown = self.attack_cooldown
self.last_attack = None
self.opponent.reset(self.budget)
def _get_state(self):
# used for simulate
state_me = self.budget, self.previous_fails, self.current_attack_duration, \
self.current_attack_cooldown, self.last_attack
state_opp = self.opponent.get_state()
return state_me, state_opp
def _set_state(self, my_state, opp_state):
# used for simulate
self.opponent.set_state(opp_state)
budget, previous_fails, current_attack_duration, current_attack_cooldown, last_attack = my_state
self.budget = budget
self.previous_fails = previous_fails
self.current_attack_duration = current_attack_duration
self.current_attack_cooldown = current_attack_cooldown
self.last_attack = last_attack
def has_failed(self):
"""
This signal is sent by the environment and indicated the opponent attack could not be implmented on the
powergrid, most likely due to the attack to be ambiguous.
"""
self.previous_fails = True
def attack(self, observation, agent_action, env_action):
"""
This function calls the attack from the opponent.
It check whether the budget is consistent with the attack (budget should be more that the cosst
associated with the attack). If the attack cost too much, then it is replaced by a "do nothing"
action. Otherwise, the attack will be implemented by the environment.
Note that if the attack is "ambiguous" it will fails (the environment will replace it by a
"do nothing" action), but the budget will still be consumed.
**NB** it is expected that this function update the :attr:`OpponentSpace.last_attack` attribute
with ``None`` if the opponent choose not to attack, or with the attack of the opponent otherwise.
Parameters
----------
observation: :class:`grid2op.Observation.Observation`
The last observation (at time t)
agent_action: :class:`grid2op.Action.Action`
The action that the agent took
env_action: :class:`grid2op.Action.Action`
The modification that the environment will take.
Returns
-------
res: :class:`grid2op.Action.Action` : The attack the opponent wants to perform
(or "do nothing" if the attack was too costly)
or class:`NoneType` : Returns None if no action is taken
"""
if observation is None:
# this is the first time step, which is not a "real" one
# just here to load the data properly, so opponent do not attack there
return None, 0
# Update variables
self.budget += self.budget_per_timestep
self.current_attack_duration = max(0, self.current_attack_duration - 1)
self.current_attack_cooldown = max(0, self.current_attack_cooldown - 1)
attack_called = False
# If currently attacking
if self.current_attack_duration > 0:
attack = self.last_attack
# If the opponent has already attacked today
elif self.current_attack_cooldown > self.attack_cooldown:
attack = None
# If the opponent can attack
else:
self.previous_fails = False
attack = self.opponent.attack(observation, agent_action, env_action, self.budget,
self.previous_fails)
attack_called = True
# If the cost is too high
final_budget = self.budget # TODO add the: + self.budget_per_timestep * (self.attack_duration - 1)
# i did not do it in case an attack is ok at the beginning, ok at the end, but at some point in the attack
# process it is not (but i'm not sure this can happen, and don't have time to think about it right now)
if self.attack_duration * self.compute_budget(attack) > final_budget:
attack = None
self.previous_fails = True
# If we can afford the attack
elif attack is not None:
# even if it's "do nothing", it's sill an attack. To bad if the opponent chose to do nothing.
self.current_attack_duration = self.attack_duration
self.current_attack_cooldown += self.attack_cooldown
if not attack_called:
self.opponent.tell_attack_continues(observation, agent_action, env_action, self.budget)
self.budget -= self.compute_budget(attack)
self.last_attack = attack
attack_duration = self.current_attack_duration
if attack is None:
attack_duration = 0
return attack, attack_duration