-
Notifications
You must be signed in to change notification settings - Fork 112
/
SingleEnvMultiProcess.py
132 lines (103 loc) · 4.48 KB
/
SingleEnvMultiProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np
from grid2op.Environment.BaseMultiProcessEnv import BaseMultiProcessEnvironment
class SingleEnvMultiProcess(BaseMultiProcessEnvironment):
"""
This class allows to evaluate a single agent instance on multiple environments running in parallel.
It is a kind of :class:`BaseMultiProcessEnvironment`. For more information you can consult the
documentation of this parent class. It allows to interact at the same time with different copy of the
(same) environment in parallel
Attributes
-----------
env: `list::grid2op.Environment.Environment`
Al list of environments for which the evaluation will be made in parallel.
nb_env: ``int``
Number of parallel underlying environment that will be handled. It is also the size of the list of actions
that need to be provided in :func:`MultiEnvironment.step` and the return sizes of the list of this
same function.
Examples
--------
An example on how you can best leverage this class is given in the getting_started notebooks. Another simple
example is:
.. code-block:: python
from grid2op.BaseAgent import DoNothingAgent
from grid2op.MakeEnv import make
from grid2op.Environment import SingleEnvMultiProcess
# create a simple environment
env = make()
# number of parrallel environment
nb_env = 2 # change that to adapt to your system
NB_STEP = 100 # number of step for each environment
# create a simple agent
agent = DoNothingAgent(env.action_space)
# create the multi environment class
multi_envs = SingleEnvMultiProcess(env=env, nb_env=nb_env)
# making is usable
obs = multi_envs.reset()
rews = [env.reward_range[0] for i in range(nb_env)]
dones = [False for i in range(nb_env)]
# performs the appropriated steps
for i in range(NB_STEP):
acts = [None for _ in range(nb_env)]
for env_act_id in range(nb_env):
acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id], dones[env_act_id])
obs, rews, dones, infos = multi_envs.step(acts)
# DO SOMETHING WITH THE AGENT IF YOU WANT
# close the environments
multi_envs.close()
# close the initial environment
env.close()
"""
def __init__(self, env, nb_env, obs_as_class=True, return_info=True, logger=None):
envs = [env for _ in range(nb_env)]
super().__init__(
envs,
obs_as_class=obs_as_class,
return_info=return_info,
logger=logger.getChild("SingleEnvMultiProcess")
if logger is not None
else None,
)
if __name__ == "__main__":
from tqdm import tqdm
from grid2op import make
from grid2op.Agent import DoNothingAgent
nb_env = 8 # change that to adapt to your system
NB_STEP = 100 # number of step for each environment
env = make()
env.seed(42)
agent = DoNothingAgent(env.action_space)
multi_envs = SingleEnvMultiProcess(env, nb_env)
obs = multi_envs.reset()
rews = [env.reward_range[0] for i in range(nb_env)]
dones = [False for i in range(nb_env)]
total_reward = 0.0
for i in tqdm(range(NB_STEP)):
acts = [None for _ in range(nb_env)]
for env_act_id in range(nb_env):
acts[env_act_id] = agent.act(
obs[env_act_id], rews[env_act_id], dones[env_act_id]
)
obs, rews, dones, infos = multi_envs.step(acts)
total_reward += np.sum(rews)
len(rews)
multi_envs.close()
ob = env.reset()
rew = env.reward_range[0]
done = False
total_reward_single = 0
for i in tqdm(range(NB_STEP)):
act = agent.act(ob, rew, done)
ob, rew, done, info = env.step(act)
if done:
ob = env.reset()
total_reward_single += np.sum(rew)
env.close()
print("total_reward mluti_env: {}".format(total_reward))
print("total_reward single env: {}".format(total_reward_single))