-
Notifications
You must be signed in to change notification settings - Fork 113
/
gymenv.py
146 lines (120 loc) · 5.65 KB
/
gymenv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np
import gym
from grid2op.dtypes import dt_int
from grid2op.Chronics import Multifolder
from grid2op.gym_compat.gym_obs_space import GymObservationSpace
from grid2op.gym_compat.gym_act_space import GymActionSpace
from grid2op.gym_compat.utils import (check_gym_version, sample_seed,
_MAX_GYM_VERSION_RANDINT, GYM_VERSION)
class GymEnv(gym.Env):
"""
fully implements the openAI gym API by using the :class:`GymActionSpace` and :class:`GymObservationSpace`
for compliance with openAI gym.
They can handle action_space_converter or observation_space converter to change the representation of data
that will be fed to the agent. #TODO
.. warning::
The `gym` package has some breaking API change since its version 0.26. Depending on the version installed,
we attempted, in grid2op, to maintain compatibility both with former version and later one. This makes this
class behave differently depending on the version of gym you have installed !
The main changes involve the functions `env.step` and `env.reset`
Notes
------
The environment passed as input is copied. It is not modified by this "gym environment"
Examples
--------
This can be used like:
.. code-block:: python
import grid2op
from grid2op.gym_compat import GymEnv
env_name = ...
env = grid2op.make(env_name)
gym_env = GymEnv(env) # is a gym environment properly inheriting from gym.Env !
"""
def __init__(self, env_init, shuffle_chronics=True):
check_gym_version()
self.init_env = env_init.copy()
self.action_space = GymActionSpace(self.init_env)
self.observation_space = GymObservationSpace(self.init_env)
self.reward_range = self.init_env.reward_range
self.metadata = self.init_env.metadata
self._shuffle_chronics = shuffle_chronics
if GYM_VERSION <= _MAX_GYM_VERSION_RANDINT:
self.seed = self._aux_seed
self.reset = self._aux_reset
self.step = self._aux_step
else:
self.reset = self._aux_reset_new
self.step = self._aux_step_new
def _aux_step(self, gym_action):
# used for gym < 0.26
g2op_act = self.action_space.from_gym(gym_action)
g2op_obs, reward, done, info = self.init_env.step(g2op_act)
gym_obs = self.observation_space.to_gym(g2op_obs)
return gym_obs, float(reward), done, info
def _aux_step_new(self, gym_action):
# used for gym >= 0.26
# TODO refacto with _aux_step
g2op_act = self.action_space.from_gym(gym_action)
g2op_obs, reward, terminated, info = self.init_env.step(g2op_act)
gym_obs = self.observation_space.to_gym(g2op_obs)
truncated = False # see https://github.com/openai/gym/pull/2752
return gym_obs, float(reward), terminated, truncated, info
def _aux_reset(self, seed=None, return_info=None, options=None):
# used for gym < 0.26
if self._shuffle_chronics and isinstance(
self.init_env.chronics_handler.real_data, Multifolder
):
self.init_env.chronics_handler.sample_next_chronics()
if seed is not None:
seed_, next_seed, underlying_env_seeds = self._aux_seed(seed)
g2op_obs = self.init_env.reset()
gym_obs = self.observation_space.to_gym(g2op_obs)
if return_info:
chron_id = self.init_env.chronics_handler.get_id()
info = {"time serie id": chron_id}
if seed is not None:
info["seed"] = seed
info["grid2op_env_seed"] = next_seed
info["underlying_env_seeds"] = underlying_env_seeds
return gym_obs, info
else:
return gym_obs
def _aux_reset_new(self, seed=None, options=None):
# used for gym > 0.26
return self._aux_reset(seed, True, options)
def render(self, mode="human"):
"""for compatibility with open ai gym render function"""
super(GymEnv, self).render(mode=mode)
self.init_env.render(mode=mode)
def close(self):
if hasattr(self, "init_env") and self.init_env is not None:
self.init_env.close()
del self.init_env
self.init_env = None
if hasattr(self, "action_space") and self.action_space is not None:
self.action_space.close()
self.action_space = None
if hasattr(self, "observation_space") and self.observation_space is not None:
self.observation_space.close()
self.observation_space = None
def _aux_seed(self, seed=None):
# deprecated in gym >=0.26
if seed is not None:
# seed the gym env
super().reset(seed=seed)
# then seed the underlying grid2op env
max_ = np.iinfo(dt_int).max
next_seed = sample_seed(max_, self._np_random)
underlying_env_seeds = self.init_env.seed(next_seed)
return seed, next_seed, underlying_env_seeds
return None, None, None
def __del__(self):
# delete possible dangling reference
self.close()