/
BaseAgent.py
65 lines (51 loc) · 2.51 KB
/
BaseAgent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
from abc import ABC, abstractmethod
class BaseAgent(ABC):
"""
This class represents the base class of an BaseAgent. All bot / controller / agent used in the Grid2Op simulator
should derived from this class.
To work properly, it is advise to create BaseAgent after the :class:`grid2op.Environment` has been created and reuse
the :attr:`grid2op.Environment.Environment.action_space` to build the BaseAgent.
Attributes
-----------
action_space: :class:`grid2op.Action.ActionSpace`
It represent the action space ie a tool that can serve to create valid action. Note that a valid action can
be illegal or ambiguous, and so lead to a "game over" or to a error. But at least it will have a proper size.
"""
def __init__(self, action_space):
self.action_space = action_space
def reset(self, obs):
"""
This method is called at the beginning of a new episode.
It is implemented by agents to reset their internal state if needed.
Attributes
-----------
obs: :class:`grid2op.Observation.BaseObservation`
The first observation corresponding to the initial state of the environment.
"""
pass
@abstractmethod
def act(self, observation, reward, done=False):
"""
This is the main method of an BaseAgent. Given the current observation and the current reward (ie the reward that
the environment send to the agent after the previous action has been implemented).
Parameters
----------
observation: :class:`grid2op.Observation.BaseObservation`
The current observation of the :class:`grid2op.Environment.Environment`
reward: ``float``
The current reward. This is the reward obtained by the previous action
done: ``bool``
Whether the episode has ended or not. Used to maintain gym compatibility
Returns
-------
res: :class:`grid2op.Action.PlaybleAction`
The action chosen by the bot / controler / agent.
"""
pass