Merge pull request #244 from BDonnot/master

hotfix for 1.6.2 (simulate bug)
rte-france · Aug 18, 2021 · fb6c934 · fb6c934
2 parents 5620aec + fa93710
commit fb6c934
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 25 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -23,6 +23,11 @@ Change Log
 - [???] "asynch" multienv
 - [???] properly model interconnecting powerlines
 
+[1.6.2] (hotfix) - 2021-08-18
+-----------------------------
+- [FIXED] an issue when using `obs.simulate` with `_AlarmScore` (major bug)
+- [FIXED] now properly initialized the "complete_action_class" of the backend (minor bug)
+
 [1.6.2] - 2021-07-27
 ---------------------
 - [ADDED] the complete support for pickling grid2op classes. This is a major feature that allows to use grid2op

diff --git a/grid2op/Backend/Backend.py b/grid2op/Backend/Backend.py
@@ -1452,7 +1452,6 @@ def get_action_to_set(self):
                 q_s[bus_s == -1] = np.NaN
                 dict_["shunt"]["shunt_p"] = p_s
                 dict_["shunt"]["shunt_q"] = q_s
-
         set_me.update(dict_)
         return set_me
 
@@ -1547,6 +1546,8 @@ def assert_grid_correct(self):
         my_cls = type(self)
         my_cls.my_bk_act_class = _BackendAction.init_grid(my_cls)
         my_cls._complete_action_class = CompleteAction.init_grid(my_cls)
+        my_cls._complete_action_class._add_shunt_data()
+        my_cls._complete_action_class._update_value_set()
         my_cls.assert_grid_correct_cls()
 
     def assert_grid_correct_after_powerflow(self):

diff --git a/grid2op/Reward/_AlarmScore.py b/grid2op/Reward/_AlarmScore.py
@@ -7,11 +7,12 @@
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
 import numpy as np
+import copy
 
 from grid2op.Exceptions import Grid2OpException
 from grid2op.Reward import AlarmReward
 from grid2op.dtypes import dt_float
-import copy
+
 
 class _AlarmScore(AlarmReward):
     """
@@ -56,19 +57,25 @@ def __init__(self):
         # required if you want to design a custom reward taking into account the
         # alarm feature
         self.reward_min = dt_float(-2.)
-        #we keep other parameters values from AlarmReward as is
+        # we keep other parameters values from AlarmReward as is
 
         self.mult_for_right_zone = 1.5
         self.window_disconnection = 4
 
         self.disc_lines_all_before_cascade = []
+        self.n_line = None
+
+        # This class remembers the past state of the grid, this does not make sense for the "simulate" env
+        # so i deactivate it in this case.
+        from grid2op.Observation._ObsEnv import _ObsEnv  # to avoid circular dependencies
+        self._deactivate_reward_cls = (_ObsEnv, )
 
     def initialize(self, env):
         if not env._has_attention_budget:
-            raise Grid2OpException("Impossible to use the \"AlarmReward\" with an environment for which this feature "
+            raise Grid2OpException("Impossible to use the \"_AlarmScore\" with an environment for which this feature "
                                    "is disabled. Please make sure \"env._has_attention_budget\" is set to ``True`` or "
                                    "change the reward class with `grid2op.make(..., reward_class=AnyOtherReward)`")
-        self.n_line=env.n_line
+        self.n_line = env.n_line
         self.reset(env)
 
     def reset(self, env):
@@ -78,39 +85,40 @@ def reset(self, env):
 
     def _lines_disconnected_first(self, disc_lines_at_cascading_time):
         """
-        here we detect the disconnected lines that we will consider to compute the mult_for_zone multiplifying factor.
+        here we detect the disconnected lines that we will consider to compute the `mult_for_zone` multiplying factor.
         Either the lines that were disconnected in a short period before final failure. Otherwise the first lines
-        disconnedted at the time of failure
+        disconnected at the time of failure
 
         :param disc_lines_at_cascading_time: lines that are disconnected first at time of failure
         :return:
         """
 
-        disc_lines_to_consider_for_score = np.zeros(self.n_line,dtype=bool)
+        disc_lines_to_consider_for_score = np.zeros(self.n_line, dtype=bool)
 
         nb_obs = len(self.disc_lines_all_before_cascade)
 
-        for t in range(nb_obs - self.window_disconnection, nb_obs):
-            disc_lines_to_consider_for_score[self.disc_lines_all_before_cascade[t] >= 0]=True
+        for step in range(nb_obs - self.window_disconnection, nb_obs):
+            disc_lines_to_consider_for_score[self.disc_lines_all_before_cascade[step] >= 0] = True
 
-        if (np.sum(disc_lines_to_consider_for_score) == 0):
-            disc_lines_to_consider_for_score = disc_lines_at_cascading_time==0
+        if np.sum(disc_lines_to_consider_for_score) == 0:
+            disc_lines_to_consider_for_score = disc_lines_at_cascading_time == 0
 
-        #if we are there, it is because we have identified before that the failure is due to disconnected powerlines
-        assert(np.any(disc_lines_to_consider_for_score))
+        # if we are there, it is because we have identified before that the failure is due to disconnected powerlines
+        assert np.any(disc_lines_to_consider_for_score)
 
-        #we transform the vector so that diconnected lines have a zero, to be coherent with env._disc_lines
-        return 1-disc_lines_to_consider_for_score
+        # we transform the vector so that disconnected lines have a zero, to be coherent with env._disc_lines
+        return 1 - disc_lines_to_consider_for_score
 
     def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
+        if isinstance(env, self._deactivate_reward_cls):
+            return self.reward_no_game_over
 
         disc_lines_now = env._disc_lines
 
         if is_done:
             if not has_error:
                 # agent went until the end
                 return self.reward_max
-
             if np.all(env._disc_lines == -1):
                 # game over is not caused by the tripping of a powerline
                 return self.reward_min
@@ -139,6 +147,8 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
             return best_score
         else:
             # make sure to deepcopy, otherwise it gets updated with the last timestep value for every previous timesteps
-            self.disc_lines_all_before_cascade.append(copy.deepcopy(disc_lines_now))  # we log the line disconnected over time
+            # we log the line disconnected over time
+            # TODO have a cache there and store only the last few states, most of what is stored here is not used
+            self.disc_lines_all_before_cascade.append(copy.deepcopy(disc_lines_now))
             res = self.reward_no_game_over
-        return res
+        return res
diff --git a/grid2op/tests/test_Action.py b/grid2op/tests/test_Action.py
@@ -194,11 +194,10 @@ def setUp(self):
         self.gridobj = GridObjects_cls()
         self.n_line = self.gridobj.n_line
 
-        self.ActionSpaceClass = ActionSpace.init_grid(self.gridobj)
-        # self.helper_action = ActionSpace(self.gridobj, legal_action=self.game_rules.legal_action)
+        self.ActionSpaceClass = ActionSpace.init_grid(GridObjects_cls)
         act_cls = self._action_setup()
-        act_cls = act_cls.init_grid(self.gridobj)
-        self.helper_action = self.ActionSpaceClass(self.gridobj, legal_action=self.game_rules.legal_action,
+        self.helper_action = self.ActionSpaceClass(GridObjects_cls,
+                                                   legal_action=self.game_rules.legal_action,
                                                    actionClass=act_cls)
         self.helper_action.seed(42)
         # save_to_dict(self.res, self.helper_action, "subtype", lambda x: re.sub("(<class ')|('>)", "", "{}".format(x)))
@@ -800,7 +799,7 @@ def test_from_vect_storage(self):
         """test from vect also work with storage action"""
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
-            action1 = self.helper_action({"storage_power": [(0, -7.42)]})
+            action1 = self.helper_action({"set_storage": [(0, -7.42)]})
         action2 = self.helper_action({})
 
         vect_act1 = action1.to_vect()

diff --git a/grid2op/tests/test_AlarmFeature.py b/grid2op/tests/test_AlarmFeature.py
@@ -15,7 +15,7 @@
 
 from grid2op.operator_attention import LinearAttentionBudget
 from grid2op import make
-from grid2op.Reward import RedispReward
+from grid2op.Reward import RedispReward, _AlarmScore
 from grid2op.Exceptions import Grid2OpException
 from grid2op.Runner import Runner
 from grid2op.Environment import Environment
@@ -436,6 +436,35 @@ def test_kwargs(self):
         obs, reward, done, info = env2.step(env2.action_space())
         assert obs.attention_budget == 3 + 1. / (12. * 8.)
 
+    def test_simulate(self):
+        """issue reported during icaps 2021"""
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            env = make("l2rpn_icaps_2021", test=True, reward_class=_AlarmScore)
+        env.set_thermal_limit([ 20 ,  70  ,  36.049267 ,  43.361996 , 407.20905  ,
+        42.96296  ,  23.125486 ,   7.005345 ,  61.224003 ,  18.283638 ,
+        20.992632 ,  89.384026 , 117.01148  ,  62.883495 ,  44.568665 ,
+        29.756845 ,  14.604381 ,  28.99635  , 124.59952  , 124.59952  ,
+        38.46957  ,  48.00529  , 112.23501  , 139.56854  ,  57.25149  ,
+        35.785202 ,  31.468952 ,  98.922386 ,  97.78254  ,  10.58541  ,
+         7.2501163,  34.89438  ,  66.21333  ,  89.454895 ,  40.088715 ,
+        59.50673  ,  54.07072  ,  47.005745 ,  49.29639  ,  60.19898  ,
+        98.318146 , 110.93459  , 178.60854  ,  48.504723 ,   9.022086 ,
+       197.42432  , 174.3434   , 295.6653   , 149.95523  , 149.95523  ,
+        50.128273 ,  31.93147  ,  74.32939  ,  54.26264  ,  41.730865 ,
+       238.96637  , 197.42432  , 113.98372  , 413.98587  ])
+        _ = env.reset()
+        # it crashed
+        obs, *_ = env.step(env.action_space())
+        obs, *_ = env.step(env.action_space())
+        alarm_act = env.action_space()
+        alarm_act.raise_alarm = [0]
+        obs, reward, done, info = env.step(alarm_act)
+        assert not done
+        # next step there is a game over due to
+        sim_obs, sim_r, sim_done, sim_info = obs.simulate(env.action_space())
+        assert sim_done
+
 
 if __name__ == "__main__":
     unittest.main()