-
Notifications
You must be signed in to change notification settings - Fork 116
/
SingleEnvMultiProcess.py
123 lines (94 loc) · 4.27 KB
/
SingleEnvMultiProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np
from grid2op.Environment.BaseMultiProcessEnv import BaseMultiProcessEnvironment
class SingleEnvMultiProcess(BaseMultiProcessEnvironment):
"""
This class allows to evaluate a single agent instance on multiple environments running in parallel.
It is a kind of :class:`BaseMultiProcessEnvironment`. For more information you can consult the
documentation of this parent class. It allows to interact at the same time with different copy of the
(same) environment in parallel
Attributes
-----------
env: `list::grid2op.Environment.Environment`
Al list of environments for which the evaluation will be made in parallel.
nb_env: ``int``
Number of parallel underlying environment that will be handled. It is also the size of the list of actions
that need to be provided in :func:`MultiEnvironment.step` and the return sizes of the list of this
same function.
Examples
--------
An example on how you can best leverage this class is given in the getting_started notebooks. Another simple
example is:
.. code-block:: python
from grid2op.BaseAgent import DoNothingAgent
from grid2op.MakeEnv import make
from grid2op.Environment import SingleEnvMultiProcess
# create a simple environment
env = make()
# number of parrallel environment
nb_env = 2 # change that to adapt to your system
NB_STEP = 100 # number of step for each environment
# create a simple agent
agent = DoNothingAgent(env.action_space)
# create the multi environment class
multi_envs = SingleEnvMultiProcess(env=env, nb_env=nb_env)
# making is usable
obs = multi_envs.reset()
rews = [env.reward_range[0] for i in range(nb_env)]
dones = [False for i in range(nb_env)]
# performs the appropriated steps
for i in range(NB_STEP):
acts = [None for _ in range(nb_env)]
for env_act_id in range(nb_env):
acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id], dones[env_act_id])
obs, rews, dones, infos = multi_envs.step(acts)
# DO SOMETHING WITH THE AGENT IF YOU WANT
# close the environments
multi_envs.close()
# close the initial environment
env.close()
"""
def __init__(self, env, nb_env, obs_as_class=True, return_info=True):
envs = [env for _ in range(nb_env)]
super().__init__(envs, obs_as_class=obs_as_class, return_info=return_info)
if __name__ == "__main__":
from tqdm import tqdm
from grid2op import make
from grid2op.Agent import DoNothingAgent
nb_env = 8 # change that to adapt to your system
NB_STEP = 100 # number of step for each environment
env = make()
env.seed(42)
agent = DoNothingAgent(env.action_space)
multi_envs = SingleEnvMultiProcess(env, nb_env)
obs = multi_envs.reset()
rews = [env.reward_range[0] for i in range(nb_env)]
dones = [False for i in range(nb_env)]
total_reward = 0.
for i in tqdm(range(NB_STEP)):
acts = [None for _ in range(nb_env)]
for env_act_id in range(nb_env):
acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id], dones[env_act_id])
obs, rews, dones, infos = multi_envs.step(acts)
total_reward += np.sum(rews)
len(rews)
multi_envs.close()
ob = env.reset()
rew = env.reward_range[0]
done = False
total_reward_single = 0
for i in tqdm(range(NB_STEP)):
act = agent.act(ob, rew, done)
ob, rew, done, info = env.step(act)
if done:
ob = env.reset()
total_reward_single += np.sum(rew)
env.close()
print("total_reward mluti_env: {}".format(total_reward))
print("total_reward single env: {}".format(total_reward_single))