Merge pull request #457 from GoubetClem/dev_1.8.2_rulesbyarea

dev_1.8.2 : add default rules to be set by area
rte-france · Jun 6, 2023 · 5ce9f16 · 5ce9f16
2 parents 27c7372 + 686f02e
commit 5ce9f16
Show file tree

Hide file tree

Showing 15 changed files with 482 additions and 73 deletions.
diff --git a/.gitignore b/.gitignore
@@ -67,7 +67,7 @@ var/
 *.egg
 
 # VirtualEnv
-venv_grid2op/
+*venv_grid2op/
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/AUTHORS.txt b/AUTHORS.txt
@@ -15,4 +15,5 @@ Further Contributions by:
     - Mario Jothy
     - Jan-Hendrik Menke
     - Vincent Renault
-    - Florian Schäfer
+    - Florian Schäfer
+    - Clément Goubet
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -96,6 +96,8 @@ Change Log
   `obs.get_env_from_external_forecasts(...)`
 - [ADDED] adding the `TimedOutEnvironment` that takes "do nothing" actions when the agent
   takes too much time to compute. This involves quite some changes in the runner too.
+- [ADDED] Runner is now able to store if an action is legal or ambiguous
+- [IMPROVED] possibility to "chain" the call to simulate when multiple forecast
 - [ADDED] experimental support to count the number of "high resolution simulator" (`obs.simulate`, 
   `obs.get_simulator` and `obs.get_forecast_env`) in the environment (see 
   https://github.com/rte-france/Grid2Op/issues/417). It might not work properly in distributed settings

diff --git a/grid2op/Environment/BaseEnv.py b/grid2op/Environment/BaseEnv.py
@@ -3807,7 +3807,8 @@ def forecasts(self):
             self._forecasts = self.chronics_handler.forecasts()
         return self._forecasts
 
-    def _check_rules_correct(self, legalActClass):
+    @staticmethod
+    def _check_rules_correct(legalActClass):
         if isinstance(legalActClass, type):
             # raise Grid2OpException(
             #     'Parameter "legalActClass" used to build the Environment should be a type '

diff --git a/grid2op/Environment/Environment.py b/grid2op/Environment/Environment.py
@@ -279,6 +279,7 @@ def _init_backend(
         self._check_rules_correct(legalActClass)
 
         self._game_rules = RulesChecker(legalActClass=legalActClass)
+        self._game_rules.initialize(self)
         self._legalActClass = legalActClass
 
         # action helper

diff --git a/grid2op/Environment/_ObsEnv.py b/grid2op/Environment/_ObsEnv.py
@@ -183,6 +183,7 @@ def _init_backend(
 
         self._check_rules_correct(legalActClass)
         self._game_rules = RulesChecker(legalActClass=legalActClass)
+        self._game_rules.initialize(self)
         self._legalActClass = legalActClass
         # self._action_space = self._do_nothing
         self.backend.set_thermal_limit(self._thermal_limit_a)

diff --git a/grid2op/Episode/EpisodeData.py b/grid2op/Episode/EpisodeData.py
@@ -9,7 +9,7 @@
 import json
 import os
 import warnings
-
+import copy
 import numpy as np
 
 import grid2op
@@ -139,6 +139,7 @@ class EpisodeData:
     TIMES = "episode_times.json"
     OTHER_REWARDS = "other_rewards.json"
     AG_EXEC_TIMES = "agent_exec_times.npz"
+    LEGAL_AMBIGUOUS = "legal_ambiguous.npz"
     ACTIONS_FILE = "actions.npz"
     ENV_ACTIONS_FILE = "env_modifications.npz"
     OBSERVATIONS_FILE = "observations.npz"
@@ -185,6 +186,9 @@ def __init__(
         get_dataframes=None,
         force_detail=False,
         other_rewards=[],
+        legal=None,
+        ambiguous=None,
+        has_legal_ambiguous=False,
         _init_collections=False,
     ):
         self.parameters = None
@@ -269,6 +273,10 @@ def __init__(
         self.name_sub = action_space.name_sub
         self.force_detail = force_detail
 
+        self.has_legal_ambiguous = has_legal_ambiguous
+        self.legal = copy.deepcopy(legal)
+        self.ambiguous = copy.deepcopy(ambiguous)
+
         if path_save is not None:
             self.agent_path = os.path.abspath(path_save)
             self.episode_path = os.path.join(self.agent_path, name)
@@ -432,7 +440,7 @@ def from_disk(cls, agent_path, name="1"):
         if agent_path is None:
             raise Grid2OpException(
                 'A path to an episode should be provided, please call "from_disk" with '
-                '"agent_path other" than None'
+                '"agent_path" other than None'
             )
         episode_path = os.path.abspath(os.path.join(agent_path, name))
 
@@ -462,6 +470,17 @@ def from_disk(cls, agent_path, name="1"):
             attack = np.load(os.path.join(episode_path, EpisodeData.ATTACK))["data"]
             rewards = np.load(os.path.join(episode_path, EpisodeData.REWARDS))["data"]
 
+            path_legal_ambiguous = os.path.join(episode_path, EpisodeData.LEGAL_AMBIGUOUS)
+            has_legal_ambiguous = False
+            if os.path.exists(path_legal_ambiguous):
+                legal_ambiguous = np.load(path_legal_ambiguous)["data"]
+                legal = copy.deepcopy(legal_ambiguous[:, 0])
+                ambiguous = copy.deepcopy(legal_ambiguous[:, 1])
+                has_legal_ambiguous = True
+            else:
+                legal = None
+                ambiguous = None
+
         except FileNotFoundError as ex:
             raise Grid2OpException(f"EpisodeData file not found \n {str(ex)}")
 
@@ -504,6 +523,9 @@ def from_disk(cls, agent_path, name="1"):
             name=name,
             get_dataframes=True,
             other_rewards=other_rewards,
+            legal=legal,
+            ambiguous=ambiguous,
+            has_legal_ambiguous=has_legal_ambiguous,
             _init_collections=True,
         )
 
@@ -605,58 +627,69 @@ def incr_store(
 
         """
 
-        if self.force_detail or self.serialize:
-            self.actions.update(time_step, act, efficient_storing)
-            self.env_actions.update(time_step, env_act, efficient_storing)
-            # deactive the possibility to do "forecast" in this serialized instance
-            tmp_obs_env = obs._obs_env
-            tmp_inj = obs._forecasted_inj
-            obs._obs_env = None
-            obs._forecasted_inj = []
-            self.observations.update(time_step + 1, obs, efficient_storing)
-            obs._obs_env = tmp_obs_env
-            obs._forecasted_inj = tmp_inj
-
-            if opp_attack is not None:
-                self.attacks.update(time_step, opp_attack, efficient_storing)
+        if not (self.force_detail or self.serialize):
+            return
+
+        self.actions.update(time_step, act, efficient_storing)
+        self.env_actions.update(time_step, env_act, efficient_storing)
+        # deactive the possibility to do "forecast" in this serialized instance
+        tmp_obs_env = obs._obs_env
+        tmp_inj = obs._forecasted_inj
+        obs._obs_env = None
+        obs._forecasted_inj = []
+        self.observations.update(time_step + 1, obs, efficient_storing)
+        obs._obs_env = tmp_obs_env
+        obs._forecasted_inj = tmp_inj
+
+        if opp_attack is not None:
+            self.attacks.update(time_step, opp_attack, efficient_storing)
+        else:
+            if efficient_storing:
+                self.attacks.collection[time_step - 1, :] = 0.0
             else:
-                if efficient_storing:
-                    self.attacks.collection[time_step - 1, :] = 0.0
+                # might not work !
+                self.attacks = np.concatenate((self.attacks, self.attack_templ))
+
+        if efficient_storing:
+            # efficient way of writing
+            self.times[time_step - 1] = time_step_duration
+            self.rewards[time_step - 1] = reward
+            if "disc_lines" in info:
+                arr = info["disc_lines"]
+                if arr is not None:
+                    self.disc_lines[time_step - 1, :] = arr
                 else:
-                    # might not work !
-                    self.attacks = np.concatenate((self.attacks, self.attack_templ))
+                    self.disc_lines[time_step - 1, :] = self.disc_lines_templ
+        else:
+            # might not work !
+            # completely inefficient way of writing
+            self.times = np.concatenate((self.times, (time_step_duration,)))
+            self.rewards = np.concatenate((self.rewards, (reward,)))
+            if "disc_lines" in info:
+                arr = info["disc_lines"]
+                if arr is not None:
+                    self.disc_lines = np.concatenate(
+                        (self.disc_lines, arr.reshape(1, -1))
+                    )
+                else:
+                    self.disc_lines = np.concatenate(
+                        (self.disc_lines, self.disc_lines_templ)
+                    )
 
+        if "rewards" in info:
+            self.other_rewards.append(
+                {k: self._convert_to_float(v) for k, v in info["rewards"].items()}
+            )
+
+        # TODO add is_illegal and is_ambiguous flags!
+        if self.has_legal_ambiguous:
+            # I need to create everything
             if efficient_storing:
-                # efficient way of writing
-                self.times[time_step - 1] = time_step_duration
-                self.rewards[time_step - 1] = reward
-                if "disc_lines" in info:
-                    arr = info["disc_lines"]
-                    if arr is not None:
-                        self.disc_lines[time_step - 1, :] = arr
-                    else:
-                        self.disc_lines[time_step - 1, :] = self.disc_lines_templ
+                self.legal[time_step - 1] = not info["is_illegal"]
+                self.ambiguous[time_step - 1] = info["is_ambiguous"]
             else:
-                # might not work !
-                # completely inefficient way of writing
-                self.times = np.concatenate((self.times, (time_step_duration,)))
-                self.rewards = np.concatenate((self.rewards, (reward,)))
-                if "disc_lines" in info:
-                    arr = info["disc_lines"]
-                    if arr is not None:
-                        self.disc_lines = np.concatenate(
-                            (self.disc_lines, arr.reshape(1, -1))
-                        )
-                    else:
-                        self.disc_lines = np.concatenate(
-                            (self.disc_lines, self.disc_lines_templ)
-                        )
-
-            if "rewards" in info:
-                self.other_rewards.append(
-                    {k: self._convert_to_float(v) for k, v in info["rewards"].items()}
-                )
-            # TODO add is_illegal and is_ambiguous flags!
+                self.legal = np.concatenate((self.legal, (not info["is_illegal"],)))
+                self.ambiguous = np.concatenate((self.ambiguous, (info["is_ambiguous"],)))
 
     def _convert_to_float(self, el):
         try:

diff --git a/grid2op/Rules/BaseRules.py b/grid2op/Rules/BaseRules.py
@@ -20,6 +20,17 @@ class BaseRules(ABC):
     In :class:`grid2op.Environment`, only action of the users are checked for legality.
 
     """
+
+    def initialize(self, env):
+        """
+        This function is used to inform the class instance about the environment specification. 
+        It can be the place to assert the defined rules are suited for the environement.
+        Parameters
+        ----------
+        env: :class:`grid2op.Environment.Environment`
+            The environment on which the action is performed. The environement instance is not fully initialized itself.
+        """
+        pass
 
     @abstractmethod
     def __call__(self, action, env):

diff --git a/grid2op/Rules/RulesChecker.py b/grid2op/Rules/RulesChecker.py
@@ -50,6 +50,17 @@ def __init__(self, legalActClass=AlwaysLegal):
                 warnings.warn("You passed the legal action as an instance that cannot be deepcopied. It will be "
                               "used 'as is', we do not garantee anything if you modify the original object.")
                 self.legal_action = legalActClass
+
+    def initialize(self, env):
+        """
+        This function is used to inform the class instance about the environment specification. 
+        It can be the place to assert the defined rules are suited for the environement.
+        Parameters
+        ----------
+        env: :class:`grid2op.Environment.Environment`
+            The environment on which the action is performed. 
+        """
+        self.legal_action.initialize(env)
 
     def __call__(self, action, env):
         """

diff --git a/grid2op/Rules/__init__.py b/grid2op/Rules/__init__.py
@@ -6,6 +6,7 @@
     "LookParam",
     "PreventReconnection",
     "PreventDiscoStorageModif",
+    "RulesByArea",
 ]
 
 from grid2op.Rules.RulesChecker import RulesChecker
@@ -15,6 +16,7 @@
 from grid2op.Rules.LookParam import LookParam
 from grid2op.Rules.PreventReconnection import PreventReconnection
 from grid2op.Rules.PreventDiscoStorageModif import PreventDiscoStorageModif
+from grid2op.Rules.rulesByArea import RulesByArea
 import warnings