/
clear_sky.py
371 lines (310 loc) · 12.7 KB
/
clear_sky.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
"""Clear sky transformer for solar time-series."""
__author__ = ["ciaran-g"]
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from sktime.transformations.base import BaseTransformer
# todo: update function?
# todo: clock changes, time-zone aware index, milliseconds?
class ClearSky(BaseTransformer):
"""Clear sky transformer for solar data.
This is a transformation which converts a time series from it's original
domain into a percentage domain. The numerator at each time step in the
transformation is the input values, the denominator is a weighted
quantile of the time series for that particular time step. In the example
of solar power transformations, the denominator is an approximation of the
clear sky power, and the output of the transformation is the clearness index.
The clear sky power, i.e. the denominator, is calculated on a grid containing
each unique combination of time-of-day and day-of-year. The spacing of the
grid depends on the frequency of the input data.
The weights are defined using von-mises kernels with bandwidths chosen by the
user.
This transformation can be inaccurate at low values, in the solar example during
early morning and late evening. Therefore, clear sky values below a threshold can
be fixed to zero in the transformed domain. Denominator values of zero are set
to zero in the transformed domain by default.
This transformer is based on the work detailed in [1]_.
Parameters
----------
quantile_prob : float, default=0.95
The probability level used to calculate the weighted quantile
bw_diurnal : float, default=100
The bandwidth of the diurnal kernel. This is the kappa value of the
von mises kernel for time of day.
bw_annual : float, default=10
The bandwidth of the annual kernel. This is the kappa value of the
von mises kernel for day of year.
min_thresh : float, default=0
The threshold of the clear sky power below which values are
set to zero in the transformed domain.
n_jobs : int or None, default=None
Number of jobs to run in parallel.
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors.
backend : str, default="loky"
Specify the parallelisation backend implementation in joblib, where
"loky" is used by default.
References
----------
.. [1] https://doi.org/10.1016/j.solener.2009.05.016
Examples
--------
>>> from sktime.transformations.series.clear_sky import ClearSky # doctest: +SKIP
>>> from sktime.datasets import load_solar # doctest: +SKIP
>>> y = load_solar() # doctest: +SKIP
>>> transformer = ClearSky() # doctest: +SKIP
>>> # takes ~1min
>>> y_trafo = transformer.fit_transform(y) # doctest: +SKIP
"""
_tags = {
# packaging info
# --------------
"authors": ["ciaran-g"],
"maintainers": ["ciaran-g"],
"python_dependencies": ["statsmodels", "scipy"],
# estimator type
# --------------
"scitype:transform-input": "Series",
"scitype:transform-output": "Series",
"scitype:transform-labels": "None",
"scitype:instancewise": True, # is this an instance-wise transform?
"capability:inverse_transform": True, # can the transformer inverse transform?
"univariate-only": True, # can the transformer handle multivariate X?
"X_inner_mtype": [
"pd.Series",
], # which mtypes do _fit/_predict support for X?
"y_inner_mtype": "None", # which mtypes do _fit/_predict support for y?
"requires_y": False, # does y need to be passed in fit?
"enforce_index_type": [
pd.DatetimeIndex,
pd.PeriodIndex,
], # index type that needs to be enforced in X/y
"fit_is_empty": False, # is fit empty and can be skipped? Yes = True
"X-y-must-have-same-index": False, # can estimator handle different X/y index?
"transform-returns-same-time-index": True,
"skip-inverse-transform": False, # is inverse-transform skipped when called?
"capability:unequal_length": False,
"capability:unequal_length:removes": True, # ?
"handles-missing-data": False,
"capability:missing_values:removes": True,
}
def __init__(
self,
quantile_prob=0.95,
bw_diurnal=100,
bw_annual=10,
min_thresh=0,
n_jobs=None,
backend="loky",
):
self.quantile_prob = quantile_prob
self.bw_diurnal = bw_diurnal
self.bw_annual = bw_annual
self.min_thresh = min_thresh
self.n_jobs = n_jobs
self.backend = backend
super().__init__()
def _fit(self, X, y=None):
"""Fit transformer to X and y.
private _fit containing the core logic, called from fit
Parameters
----------
X : Series of pd.DataFrame
Data used to estimate the clear sky power.
y : Ignored argument for interface compatibility.
Returns
-------
self: reference to self
"""
# check that the data is formatted correctly etc
self.freq = _check_index(X)
# now get grid of model
df = pd.DataFrame(index=X.index)
df["yday"] = df.index.dayofyear
df["tod"] = df.index.hour + df.index.minute / 60 + df.index.second / 60
# set up smoothing grid
tod = pd.timedelta_range(start="0T", end="1D", freq=self.freq)[:-1]
tod = [(x.total_seconds() / (60 * 60)) for x in tod.to_pytimedelta()]
yday = pd.RangeIndex(start=1, stop=367)
indx = pd.MultiIndex.from_product([yday, tod], names=["yday", "tod"])
# set up parallel function and backend
parallel = Parallel(n_jobs=self.n_jobs, backend=self.backend)
def par_csp(x):
res = _clearskypower(
y=X,
q=self.quantile_prob,
tod_i=x[1],
doy_i=x[0],
tod_vec=df["tod"],
doy_vec=df["yday"],
bw_tod=self.bw_diurnal,
bw_doy=self.bw_annual,
)
return res
# calculate the csp
csp = parallel(delayed(par_csp)(name) for name in indx)
csp = pd.Series(csp, index=indx, dtype="float64")
self.clearskypower = csp.sort_index()
return self
def _transform(self, X, y=None):
"""Transform X and return a transformed version.
private _transform containing core logic, called from transform
Parameters
----------
X : Series of pd.DataFrame
Data used to be transformed.
y : Ignored argument for interface compatibility.
Returns
-------
X_trafo : transformed version of X
"""
_freq_ind = _check_index(X)
if self.freq != _freq_ind:
raise ValueError(
"""
Change in frequency detected from original input. Make sure
X is the same frequency as used in .fit().
"""
)
# get required seasonal index
yday = X.index.dayofyear
tod = X.index.hour + X.index.minute / 60 + X.index.second / 60
indx_seasonal = pd.MultiIndex.from_arrays([yday, tod], names=["yday", "tod"])
# look up values and overwrite index
csp = self.clearskypower[indx_seasonal].copy()
csp.index = X.index
X_trafo = X / csp
# threshold for small morning/evening values
X_trafo[(csp <= self.min_thresh) & (X.notnull())] = 0
return X_trafo
def _inverse_transform(self, X, y=None):
"""Inverse transform, inverse operation to transform.
private _inverse_transform containing core logic, called from
inverse_transform
Parameters
----------
X : Series of pd.DataFrame
Data used to be inversed transformed.
y : Ignored argument for interface compatibility.
Returns
-------
X_trafo : inverse transformed version of X
"""
_freq_ind = _check_index(X)
if self.freq != _freq_ind:
raise ValueError(
"""
Change in frequency detected from original input. Make sure
X is the same frequency as used in .fit().
"""
)
yday = X.index.dayofyear
tod = X.index.hour + X.index.minute / 60 + X.index.second / 60
indx_seasonal = pd.MultiIndex.from_arrays([yday, tod], names=["yday", "tod"])
# look up values and overwrite index
csp = self.clearskypower[indx_seasonal].copy()
csp.index = X.index
X_trafo = X * csp
return X_trafo
def _get_fitted_params(self):
"""Get fitted parameters.
Returns
-------
fitted_params : dict
"""
params = {"clearskypower": self.clearskypower}
return params
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return ``"default"`` set.
There are currently no reserved values for transformers.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test
instance.
``create_test_instance`` uses the first (or only) dictionary in ``params``
"""
params = {
"quantile_prob": 0.95,
"bw_diurnal": 100,
"bw_annual": 10,
"min_thresh": None,
}
return params
def _clearskypower(y, q, tod_i, doy_i, tod_vec, doy_vec, bw_tod, bw_doy):
"""Estimate the clear sky power for a given day-of-year and hour-of-day.
Parameters
----------
y : Series of measurements
q : Probability level used for the quantile
tod_i : time-of-day of interest in hours
doy_i : day-of-year of interest in days
tod_vec : Series of time-of-day corresponding to the index of y
doy_vec: Series of day-of-year corresponding to the index of y
bw_tod : Kappa value used for defining weights for time-of-day
bw_doy : Kappa value used for defining weights for day-of-year
Returns
-------
csp : float
The clear sky power at tod_i and doy_i
"""
from scipy.stats import vonmises
from statsmodels.stats.weightstats import DescrStatsW
wts_tod = vonmises.pdf(
x=tod_i * 2 * np.pi / 24, kappa=bw_tod, loc=tod_vec * 2 * np.pi / 24
)
wts_doy = vonmises.pdf(
x=doy_i * 2 * np.pi / 365.25, kappa=bw_doy, loc=doy_vec * 2 * np.pi / 365.25
)
wts = wts_doy * wts_tod
wts = wts / wts.sum()
csp = DescrStatsW(y, weights=wts).quantile(probs=q).values[0]
return csp
def _check_index(X):
"""Check input value frequency is set and we have the correct index.
Parameters
----------
X : Series or pd.DataFrame
Data used to be inversed transformed.
Raises
------
ValueError : Input index must be class pd.DatetimeIndex or pd.PeriodIndex.
ValueError : Input index frequency cannot be inferred and is not set.
ValueError : Frequency of data not suitable for transformer as is.
Returns
-------
freq_ind : str or None
Frequency of data in string format
"""
if not (isinstance(X.index, pd.DatetimeIndex)) | (
isinstance(X.index, pd.PeriodIndex)
):
raise ValueError(
"Input index must be class pd.DatetimeIndex or pd.PeriodIndex."
)
# check that it has a frequency, if not infer
freq_ind = X.index.freq
if freq_ind is None:
freq_ind = pd.infer_freq(X.index)
if freq_ind is None:
raise ValueError("Input index frequency cannot be inferred and is not set.")
tod = pd.timedelta_range(start="0T", end="1D", freq=freq_ind)
# check frequency of tod
if (tod.freq > pd.offsets.Day(1)) | (tod.freq < pd.offsets.Second(1)):
raise ValueError(
"""
Transformer intended to be used with input frequency of greater than
or equal to one day and with a frequency of less or equal to than
1 second. Contributions welcome on adapting for these use cases.
"""
)
return freq_ind