Skip to content

Commit

Permalink
Merge pull request #244 from BDonnot/master
Browse files Browse the repository at this point in the history
hotfix for 1.6.2 (simulate bug)
  • Loading branch information
BDonnot committed Aug 18, 2021
2 parents 5620aec + fa93710 commit fb6c934
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 25 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ Change Log
- [???] "asynch" multienv
- [???] properly model interconnecting powerlines

[1.6.2] (hotfix) - 2021-08-18
-----------------------------
- [FIXED] an issue when using `obs.simulate` with `_AlarmScore` (major bug)
- [FIXED] now properly initialized the "complete_action_class" of the backend (minor bug)

[1.6.2] - 2021-07-27
---------------------
- [ADDED] the complete support for pickling grid2op classes. This is a major feature that allows to use grid2op
Expand Down
3 changes: 2 additions & 1 deletion grid2op/Backend/Backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,6 @@ def get_action_to_set(self):
q_s[bus_s == -1] = np.NaN
dict_["shunt"]["shunt_p"] = p_s
dict_["shunt"]["shunt_q"] = q_s

set_me.update(dict_)
return set_me

Expand Down Expand Up @@ -1547,6 +1546,8 @@ def assert_grid_correct(self):
my_cls = type(self)
my_cls.my_bk_act_class = _BackendAction.init_grid(my_cls)
my_cls._complete_action_class = CompleteAction.init_grid(my_cls)
my_cls._complete_action_class._add_shunt_data()
my_cls._complete_action_class._update_value_set()
my_cls.assert_grid_correct_cls()

def assert_grid_correct_after_powerflow(self):
Expand Down
46 changes: 28 additions & 18 deletions grid2op/Reward/_AlarmScore.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import numpy as np
import copy

from grid2op.Exceptions import Grid2OpException
from grid2op.Reward import AlarmReward
from grid2op.dtypes import dt_float
import copy


class _AlarmScore(AlarmReward):
"""
Expand Down Expand Up @@ -56,19 +57,25 @@ def __init__(self):
# required if you want to design a custom reward taking into account the
# alarm feature
self.reward_min = dt_float(-2.)
#we keep other parameters values from AlarmReward as is
# we keep other parameters values from AlarmReward as is

self.mult_for_right_zone = 1.5
self.window_disconnection = 4

self.disc_lines_all_before_cascade = []
self.n_line = None

# This class remembers the past state of the grid, this does not make sense for the "simulate" env
# so i deactivate it in this case.
from grid2op.Observation._ObsEnv import _ObsEnv # to avoid circular dependencies
self._deactivate_reward_cls = (_ObsEnv, )

def initialize(self, env):
if not env._has_attention_budget:
raise Grid2OpException("Impossible to use the \"AlarmReward\" with an environment for which this feature "
raise Grid2OpException("Impossible to use the \"_AlarmScore\" with an environment for which this feature "
"is disabled. Please make sure \"env._has_attention_budget\" is set to ``True`` or "
"change the reward class with `grid2op.make(..., reward_class=AnyOtherReward)`")
self.n_line=env.n_line
self.n_line = env.n_line
self.reset(env)

def reset(self, env):
Expand All @@ -78,39 +85,40 @@ def reset(self, env):

def _lines_disconnected_first(self, disc_lines_at_cascading_time):
"""
here we detect the disconnected lines that we will consider to compute the mult_for_zone multiplifying factor.
here we detect the disconnected lines that we will consider to compute the `mult_for_zone` multiplying factor.
Either the lines that were disconnected in a short period before final failure. Otherwise the first lines
disconnedted at the time of failure
disconnected at the time of failure
:param disc_lines_at_cascading_time: lines that are disconnected first at time of failure
:return:
"""

disc_lines_to_consider_for_score = np.zeros(self.n_line,dtype=bool)
disc_lines_to_consider_for_score = np.zeros(self.n_line, dtype=bool)

nb_obs = len(self.disc_lines_all_before_cascade)

for t in range(nb_obs - self.window_disconnection, nb_obs):
disc_lines_to_consider_for_score[self.disc_lines_all_before_cascade[t] >= 0]=True
for step in range(nb_obs - self.window_disconnection, nb_obs):
disc_lines_to_consider_for_score[self.disc_lines_all_before_cascade[step] >= 0] = True

if (np.sum(disc_lines_to_consider_for_score) == 0):
disc_lines_to_consider_for_score = disc_lines_at_cascading_time==0
if np.sum(disc_lines_to_consider_for_score) == 0:
disc_lines_to_consider_for_score = disc_lines_at_cascading_time == 0

#if we are there, it is because we have identified before that the failure is due to disconnected powerlines
assert(np.any(disc_lines_to_consider_for_score))
# if we are there, it is because we have identified before that the failure is due to disconnected powerlines
assert np.any(disc_lines_to_consider_for_score)

#we transform the vector so that diconnected lines have a zero, to be coherent with env._disc_lines
return 1-disc_lines_to_consider_for_score
# we transform the vector so that disconnected lines have a zero, to be coherent with env._disc_lines
return 1 - disc_lines_to_consider_for_score

def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
if isinstance(env, self._deactivate_reward_cls):
return self.reward_no_game_over

disc_lines_now = env._disc_lines

if is_done:
if not has_error:
# agent went until the end
return self.reward_max

if np.all(env._disc_lines == -1):
# game over is not caused by the tripping of a powerline
return self.reward_min
Expand Down Expand Up @@ -139,6 +147,8 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
return best_score
else:
# make sure to deepcopy, otherwise it gets updated with the last timestep value for every previous timesteps
self.disc_lines_all_before_cascade.append(copy.deepcopy(disc_lines_now)) # we log the line disconnected over time
# we log the line disconnected over time
# TODO have a cache there and store only the last few states, most of what is stored here is not used
self.disc_lines_all_before_cascade.append(copy.deepcopy(disc_lines_now))
res = self.reward_no_game_over
return res
return res
9 changes: 4 additions & 5 deletions grid2op/tests/test_Action.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,10 @@ def setUp(self):
self.gridobj = GridObjects_cls()
self.n_line = self.gridobj.n_line

self.ActionSpaceClass = ActionSpace.init_grid(self.gridobj)
# self.helper_action = ActionSpace(self.gridobj, legal_action=self.game_rules.legal_action)
self.ActionSpaceClass = ActionSpace.init_grid(GridObjects_cls)
act_cls = self._action_setup()
act_cls = act_cls.init_grid(self.gridobj)
self.helper_action = self.ActionSpaceClass(self.gridobj, legal_action=self.game_rules.legal_action,
self.helper_action = self.ActionSpaceClass(GridObjects_cls,
legal_action=self.game_rules.legal_action,
actionClass=act_cls)
self.helper_action.seed(42)
# save_to_dict(self.res, self.helper_action, "subtype", lambda x: re.sub("(<class ')|('>)", "", "{}".format(x)))
Expand Down Expand Up @@ -800,7 +799,7 @@ def test_from_vect_storage(self):
"""test from vect also work with storage action"""
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
action1 = self.helper_action({"storage_power": [(0, -7.42)]})
action1 = self.helper_action({"set_storage": [(0, -7.42)]})
action2 = self.helper_action({})

vect_act1 = action1.to_vect()
Expand Down
31 changes: 30 additions & 1 deletion grid2op/tests/test_AlarmFeature.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from grid2op.operator_attention import LinearAttentionBudget
from grid2op import make
from grid2op.Reward import RedispReward
from grid2op.Reward import RedispReward, _AlarmScore
from grid2op.Exceptions import Grid2OpException
from grid2op.Runner import Runner
from grid2op.Environment import Environment
Expand Down Expand Up @@ -436,6 +436,35 @@ def test_kwargs(self):
obs, reward, done, info = env2.step(env2.action_space())
assert obs.attention_budget == 3 + 1. / (12. * 8.)

def test_simulate(self):
"""issue reported during icaps 2021"""
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env = make("l2rpn_icaps_2021", test=True, reward_class=_AlarmScore)
env.set_thermal_limit([ 20 , 70 , 36.049267 , 43.361996 , 407.20905 ,
42.96296 , 23.125486 , 7.005345 , 61.224003 , 18.283638 ,
20.992632 , 89.384026 , 117.01148 , 62.883495 , 44.568665 ,
29.756845 , 14.604381 , 28.99635 , 124.59952 , 124.59952 ,
38.46957 , 48.00529 , 112.23501 , 139.56854 , 57.25149 ,
35.785202 , 31.468952 , 98.922386 , 97.78254 , 10.58541 ,
7.2501163, 34.89438 , 66.21333 , 89.454895 , 40.088715 ,
59.50673 , 54.07072 , 47.005745 , 49.29639 , 60.19898 ,
98.318146 , 110.93459 , 178.60854 , 48.504723 , 9.022086 ,
197.42432 , 174.3434 , 295.6653 , 149.95523 , 149.95523 ,
50.128273 , 31.93147 , 74.32939 , 54.26264 , 41.730865 ,
238.96637 , 197.42432 , 113.98372 , 413.98587 ])
_ = env.reset()
# it crashed
obs, *_ = env.step(env.action_space())
obs, *_ = env.step(env.action_space())
alarm_act = env.action_space()
alarm_act.raise_alarm = [0]
obs, reward, done, info = env.step(alarm_act)
assert not done
# next step there is a game over due to
sim_obs, sim_r, sim_done, sim_info = obs.simulate(env.action_space())
assert sim_done


if __name__ == "__main__":
unittest.main()

0 comments on commit fb6c934

Please sign in to comment.