-
Notifications
You must be signed in to change notification settings - Fork 116
/
MultiFolder.py
419 lines (341 loc) · 15.9 KB
/
MultiFolder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import os
import json
import warnings
import numpy as np
from datetime import timedelta, datetime
from grid2op.dtypes import dt_int
from grid2op.Exceptions import *
from grid2op.Chronics.GridValue import GridValue
from grid2op.Chronics.GridStateFromFile import GridStateFromFile
class Multifolder(GridValue):
"""
The classes :class:`GridStateFromFile` and :class:`GridStateFromFileWithForecasts` implemented the reading of a
single folder representing a single episode.
This class is here to "loop" between different episode. Each one being stored in a folder readable by
:class:`GridStateFromFile` or one of its derivate (eg. :class:`GridStateFromFileWithForecasts`).
Chronics are always read in the alpha-numeric order for this class. This means that if the folder is not modified,
the data are always loaded in the same order, regardless of the :class:`grid2op.Backend`, :class:`grid2op.BaseAgent` or
:class:`grid2op.Environment`.
Attributes
-----------
gridvalueClass: ``type``, optional
Type of class used to read the data from the disk. It defaults to :class:`GridStateFromFile`.
data: :class:`GridStateFromFile`
Data that will be loaded and used to produced grid state and forecasted values.
path: ``str``
Path where the folders of the episodes are stored.
sep: ``str``
Columns separtor, forwarded to :attr:`Multifolder.data` when it's built at the beginning of each episode.
subpaths: ``list``
List of all the episode that can be "played". It's a sorted list of all the directory in
:attr:`Multifolder.path`. Each one should contain data in a format that is readable by
:attr:`MultiFolder.gridvalueClass`.
id_chron_folder_current: ``int``
Id (in :attr:`MultiFolder.subpaths`) for which data are generated in the current episode.
"""
def __init__(self, path,
time_interval=timedelta(minutes=5),
start_datetime=datetime(year=2019, month=1, day=1),
gridvalueClass=GridStateFromFile,
sep=";", max_iter=-1,
chunk_size=None):
GridValue.__init__(self, time_interval=time_interval, max_iter=max_iter, chunk_size=chunk_size,
start_datetime=start_datetime)
self.gridvalueClass = gridvalueClass
self.data = None
self.path = os.path.abspath(path)
self.sep = sep
try:
self.subpaths = [os.path.join(self.path, el) for el in os.listdir(self.path)
if os.path.isdir(os.path.join(self.path, el))]
self.subpaths.sort()
self.subpaths = np.array(self.subpaths)
except FileNotFoundError:
raise ChronicsError("Path \"{}\" doesn't exists.".format(self.path)) from None
if len(self.subpaths) == 0:
raise ChronicsNotFoundError("Not chronics are found in \"{}\". Make sure there are at least "
"1 chronics folder there.".format(self.path))
# TODO clarify that
# np.random.shuffle(self.subpaths)
self.chunk_size = chunk_size
# for saving
self._order_backend_loads = None
self._order_backend_prods = None
self._order_backend_lines = None
self._order_backend_subs = None
self._names_chronics_to_backend = None
# improving looping strategy
self._filter = self._default_filter
self._prev_cache_id = 0
self._order = None
def _default_filter(self, x):
"""
default filter used at the initialization. It keeps only the first data encountered.
"""
return True
def set_filter(self, filter_fun):
"""
Assign a filtering function to remove some chronics from the next time a call to "reset_cache" is called.
**NB** filter_fun is applied to all element of :attr:`MultifolderWithCache.subpaths`. If ``True`` then it will
be put in cache, if ``False`` this data will NOT be put in the cache.
**NB** this has no effect until :attr:`Multifolder.reset` is called.
"""
self._filter = filter_fun
def next_chronics(self):
self._prev_cache_id += 1
# TODO implement the shuffling indeed.
# if self._prev_cache_id >= len(self._order):
# self.space_prng.shuffle(self._order)
self._prev_cache_id %= len(self._order)
def sample_next_chronics(self, probabilities):
"""
This function should be called before "next_chronics".
It can be used to sample non uniformly for the next next chronics.
Parameters
-----------
probabilities: ``np.ndarray``
Array of integer with the same size as the number of chronics in the cache.
Returns
-------
selected: ``int``
The integer that was selected.
"""
self._prev_cache_id = -1
# make sure it sums to 1
probabilities /= np.sum(probabilities)
# take one at "random" among these
selected = self.space_prng.choice(self._order, p=probabilities)
id_sel = np.where(self._order == selected)[0]
self._prev_cache_id = selected - 1
return id_sel
def reset(self):
"""
Rebuilt the :attr:`Multifolder._order`. This should be called after a call to :func:`Multifolder.set_filter`
is performed.
**NB** This "reset" is different from the "env.reset". It should be only called after the function to set
the filtering function has been called.
Returns
-------
new_order: ``numpy.ndarray``, dtype: str
The selected chronics paths after a call to this method.
"""
self._order = []
self._prev_cache_id = 0
for i, path in enumerate(self.subpaths):
if not self._filter(path):
continue
self._order.append(i)
if len(self._order) == 0:
raise RuntimeError("Impossible to initialize the Multifolder. Your \"filter_fun\" filters out all the "
"possible scenarios.")
self._order = np.array(self._order)
# TODO this shuffling there
# self.space_prng.shuffle(self._order)
return self.subpaths[self._order]
def initialize(self, order_backend_loads, order_backend_prods, order_backend_lines, order_backend_subs,
names_chronics_to_backend=None):
self._order_backend_loads = order_backend_loads
self._order_backend_prods = order_backend_prods
self._order_backend_lines = order_backend_lines
self._order_backend_subs = order_backend_subs
self._names_chronics_to_backend = names_chronics_to_backend
self.n_gen = len(order_backend_prods)
self.n_load = len(order_backend_loads)
self.n_line = len(order_backend_lines)
if self._order is None:
# initialize the cache
self.reset()
id_scenario = self._order[self._prev_cache_id]
this_path = self.subpaths[id_scenario]
self.data = self.gridvalueClass(time_interval=self.time_interval,
sep=self.sep,
path=this_path,
max_iter=self.max_iter,
chunk_size=self.chunk_size)
if self.seed is not None:
max_int = np.iinfo(dt_int).max
seed_chronics = self.space_prng.randint(max_int)
self.data.seed(seed_chronics)
self.data.initialize(order_backend_loads, order_backend_prods, order_backend_lines, order_backend_subs,
names_chronics_to_backend=names_chronics_to_backend)
def done(self):
"""
Tells the :class:`grid2op.Environment` if the episode is over.
Returns
-------
res: ``bool``
Whether or not the episode, represented by :attr:`MultiFolder.data` is over.
"""
return self.data.done()
def load_next(self):
"""
Load the next data from the current episode. It loads the next time step for the current episode.
Returns
-------
See the return type of :class:`GridStateFromFile.load_next` (or of :attr:`MultiFolder.gridvalueClass` if it
has been changed) for more information.
"""
return self.data.load_next()
def check_validity(self, backend):
"""
This method check that the data loaded can be properly read and understood by the :class:`grid2op.Backend`.
Parameters
----------
backend: :class:`grid2op.Backend`
The backend used for the experiment.
Returns
-------
See the return type of :class:`GridStateFromFile.check_validity` (or of :attr:`MultiFolder.gridvalueClass` if it
has been changed) for more information.
"""
return self.data.check_validity(backend)
def forecasts(self):
"""
The representation of the forecasted grid state(s), if any.
Returns
-------
See the return type of :class:`GridStateFromFile.forecasts` (or of :attr:`MultiFolder.gridvalueClass` if it
has been changed) for more information.
"""
return self.data.forecasts()
def tell_id(self, id_num):
"""
This tells this chronics to load for the next episode.
By default, if id_num is greater than the number of episode, it is equivalent at restarting from the first
one: episode are played indefinitely in the same order.
Parameters
----------
id_num: ``int``
Id of the chronics to load.
Returns
-------
"""
self._prev_cache_id = id_num
self._prev_cache_id %= len(self._order)
def get_id(self) -> str:
"""
Full absolute path of the current folder used for the current episode.
Returns
-------
res: ``str``
Path from which the data are generated for the current episode.
"""
return self.subpaths[self._order[self._prev_cache_id]]
def max_timestep(self):
return self.data.max_timestep()
def shuffle(self, shuffler):
"""
This method is used to have a better control on the order in which the subfolder containing the episode are
processed.
It can focus the evaluation on one specific folder, shuffle the folders, use only a subset of them etc. See the
examples for more information.
Parameters
----------
shuffler: ``object``
Shuffler should be a function that is called on :attr:`MultiFolder.subpaths` that will shuffle them.
It can also be used to remove some path if needed (see example).
Returns
--------
new_order: ``numpy.ndarray``, dtype: str
The order in which the chronics will be looped through
Examples
---------
If you want to simply shuffle the data you can do:
.. code-block:: python
import numpy as np
data = Multifolder(path=".")
data.shuffle(shuffler=lambda x: x[np.random.choice(len(x), size=len(x), replace=False)])
If you want to use only a subset of the path, say for example the path with index 1, 5, and 6
.. code-block:: python
data = Multifolder(path=".")
data.shuffle(shuffler=lambda x: x[1, 5, 6])
"""
self._order = shuffler(self._order)
return self.subpaths[self._order]
def set_chunk_size(self, new_chunk_size):
self.chunk_size = new_chunk_size
def split_and_save(self, datetime_beg, datetime_end, path_out):
"""
This function allows you to split the data (keeping only the data between datetime_beg and datetime_end) and to
save it on your local machine. This is espacially handy if you want to extract only a piece of the dataset we
provide for example.
# TODO add an example somewhere
Parameters
----------
datetime_beg:
See example (coming soon)
datetime_end:
See example (coming soon)
path_out: ``str``
The path were the data will be stored.
"""
if not isinstance(datetime_beg, dict):
datetime_beg_orig = datetime_beg
datetime_beg = {}
for subpath in self.subpaths:
id_this_chron = os.path.split(subpath)[-1]
datetime_beg[id_this_chron] = datetime_beg_orig
if not isinstance(datetime_end, dict):
datetime_end_orig = datetime_end
datetime_end = {}
for subpath in self.subpaths:
id_this_chron = os.path.split(subpath)[-1]
datetime_end[id_this_chron] = datetime_end_orig
seed_chronics_all = {}
for subpath in self.subpaths:
id_this_chron = os.path.split(subpath)[-1]
if not id_this_chron in datetime_beg:
continue
tmp = self.gridvalueClass(time_interval=self.time_interval,
sep=self.sep,
path=subpath,
max_iter=self.max_iter,
chunk_size=self.chunk_size)
seed_chronics = None
if self.seed is not None:
max_int = np.iinfo(dt_int).max
seed_chronics = self.space_prng.randint(max_int)
tmp.seed(seed_chronics)
seed_chronics_all[subpath] = seed_chronics
tmp.initialize(self._order_backend_loads,
self._order_backend_prods,
self._order_backend_lines,
self._order_backend_subs,
self._names_chronics_to_backend)
path_out_chron = os.path.join(path_out, id_this_chron)
tmp.split_and_save(datetime_beg[id_this_chron], datetime_end[id_this_chron], path_out_chron)
meta_params = {}
meta_params["datetime_beg"] = datetime_beg
meta_params["datetime_end"] = datetime_end
meta_params["path_out"] = path_out
meta_params["all_seeds"] = seed_chronics_all
try:
with open(os.path.join(path_out, "split_and_save_meta_params.json"), "w", encoding="utf-8") as f:
json.dump(obj=meta_params, fp=f,
sort_keys=True,
indent=4
)
except Exception as exc_:
warnings.warn("Impossible to save the \"metadata\" for the chronics with error:\n\"{}\""
"".format(exc_))
def fast_forward(self, nb_timestep):
"""
This method allows you to skip some time step at the beginning of the chronics.
This is usefull at the beginning of the training, if you want your agent to learn on more diverse scenarios.
Indeed, the data provided in the chronics usually starts always at the same date time (for example Jan 1st at
00:00). This can lead to suboptimal exploration, as during this phase, only a few time steps are managed by
the agent, so in general these few time steps will correspond to grid state around Jan 1st at 00:00.
Parameters
----------
nb_timestep: ``int``
Number of time step to "fast forward"
"""
for _ in range(nb_timestep):
self.data.load_next()