/
bcsd.py
202 lines (155 loc) · 5.89 KB
/
bcsd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import collections
import numpy as np
import pandas as pd
from sklearn.base import RegressorMixin
from sklearn.linear_model.base import LinearModel
from sklearn.utils.validation import check_is_fitted
from .utils import QuantileMapper, ensure_samples_features
def MONTH_GROUPER(x):
return x.month
class BcsdBase(LinearModel, RegressorMixin):
""" Base class for BCSD model.
"""
_fit_attributes = ['y_climo_', 'quantile_mappers_']
def __init__(self, time_grouper=MONTH_GROUPER, **qm_kwargs):
if isinstance(time_grouper, str):
self.time_grouper = pd.Grouper(freq=time_grouper)
else:
self.time_grouper = time_grouper
self.qm_kwargs = qm_kwargs
def _qm_fit_by_group(self, groups):
""" helper function to fit quantile mappers by group
Note that we store these mappers for later
"""
self.quantile_mappers_ = {}
for key, group in groups:
data = ensure_samples_features(group)
self.quantile_mappers_[key] = QuantileMapper(**self.qm_kwargs).fit(data)
def _qm_transform_by_group(self, groups):
""" helper function to apply quantile mapping by group
Note that we recombine the dataframes using pd.concat, there may be a better way to do this
"""
dfs = []
for key, group in groups:
data = ensure_samples_features(group)
qmapped = self.quantile_mappers_[key].transform(data)
dfs.append(pd.DataFrame(qmapped, index=group.index, columns=data.columns))
return pd.concat(dfs).sort_index()
class BcsdPrecipitation(BcsdBase):
""" Classic BCSD model for Precipitation
Parameters
----------
time_grouper : str or pd.Grouper, optional
Pandas time frequency str or Grouper object. Specifies how to group
time periods. Default is 'M' (e.g. Monthly).
**qm_kwargs
Keyword arguments to pass to QuantileMapper.
Attributes
----------
time_grouper : pd.Grouper
Linear Regression object.
quantile_mappers_ : dict
QuantileMapper objects (one for each time group).
"""
def fit(self, X, y):
""" Fit BcsdPrecipitation model
Parameters
----------
X : pd.Series or pd.DataFrame, shape (n_samples, 1)
Training data
y : pd.Series or pd.DataFrame, shape (n_samples, 1)
Target values.
Returns
-------
self : returns an instance of self.
"""
y_groups = y.groupby(self.time_grouper)
# calculate the climatologies
self.y_climo_ = y_groups.mean()
if self.y_climo_.values.min() <= 0:
raise ValueError('Invalid value in target climatology')
# fit the quantile mappers
self._qm_fit_by_group(y_groups)
return self
def predict(self, X):
"""Predict using the BcsdPrecipitation model
Parameters
----------
X : pd.Series or pd.DataFrame, shape (n_samples, 1)
Samples.
Returns
-------
C : pd.DataFrame, shape (n_samples, 1)
Returns predicted values.
"""
check_is_fitted(self, self._fit_attributes)
X = ensure_samples_features(X)
# Bias correction
# apply quantile mapping by month
Xqm = self._qm_transform_by_group(X.groupby(self.time_grouper))
# calculate the anomalies as a ratio of the training data
return self._calc_ratio_anoms(Xqm, self.y_climo_)
def _calc_ratio_anoms(self, obj, climatology):
dfs = []
for key, group in obj.groupby(self.time_grouper):
dfs.append(group / climatology.loc[key].values)
out = pd.concat(dfs).sort_index()
assert obj.shape == out.shape
return out
class BcsdTemperature(BcsdBase):
def fit(self, X, y):
""" Fit BcsdTemperature model
Parameters
----------
X : pd.Series or pd.DataFrame, shape (n_samples, 1)
Training data
y : pd.Series or pd.DataFrame, shape (n_samples, 1)
Target values.
Returns
-------
self : returns an instance of self.
"""
# calculate the climatologies
self._x_climo = X.groupby(self.time_grouper).mean()
y_groups = y.groupby(self.time_grouper)
self.y_climo_ = y_groups.mean()
# fit the quantile mappers
self._qm_fit_by_group(y_groups)
return self
def predict(self, X):
""" Predict using the BcsdTemperature model
Parameters
----------
X : DataFrame, shape (n_samples, 1)
Samples.
Returns
-------
C : pd.DataFrame, shape (n_samples, 1)
Returns predicted values.
"""
check_is_fitted(self, self._fit_attributes)
X = ensure_samples_features(X)
# Calculate the 9-year running mean for each month
def rolling_func(x):
return x.rolling(9, center=True, min_periods=1).mean()
X_rolling_mean = X.groupby(self.time_grouper).apply(rolling_func)
# calc shift
# why isn't this working??
# X_shift = X_rolling_mean.groupby(self.time_grouper) - self._x_climo
X_shift = self._remove_climatology(X_rolling_mean, self._x_climo)
# remove shift
X_no_shift = X - X_shift
# Bias correction
# apply quantile mapping by month
Xqm = self._qm_transform_by_group(X_no_shift.groupby(self.time_grouper))
# restore the shift
X_qm_with_shift = X_shift + Xqm
# calculate the anomalies
return self._remove_climatology(X_qm_with_shift, self.y_climo_)
def _remove_climatology(self, obj, climatology):
dfs = []
for key, group in obj.groupby(self.time_grouper):
dfs.append(group - climatology.loc[key].values)
out = pd.concat(dfs).sort_index()
assert obj.shape == out.shape
return out