/
boxcox.py
164 lines (127 loc) · 5.43 KB
/
boxcox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# -*- coding: utf-8 -*-
from scipy import stats
from sklearn.utils.validation import check_is_fitted
import numpy as np
import warnings
from .base import BaseEndogTransformer
__all__ = ['BoxCoxEndogTransformer']
class BoxCoxEndogTransformer(BaseEndogTransformer):
r"""Apply the Box-Cox transformation to an endogenous array
The Box-Cox transformation is applied to non-normal data to coerce it more
towards a normal distribution. It's specified as::
(((y + lam2) ** lam1) - 1) / lam1, if lmbda != 0, else
log(y + lam2)
Parameters
----------
lmbda : float or None, optional (default=None)
The lambda value for the Box-Cox transformation, if known. If not
specified, it will be estimated via MLE.
lmbda2 : float, optional (default=0.)
The value to add to ``y`` to make it non-negative. If, after adding
``lmbda2``, there are still negative values, a ValueError will be
raised.
neg_action : str, optional (default="raise")
How to respond if any values in ``y <= 0`` after adding ``lmbda2``.
One of ('raise', 'warn', 'ignore'). If anything other than 'raise',
values <= 0 will be truncated to the value of ``floor``.
floor : float, optional (default=1e-16)
A positive value that truncate values to if there are values in ``y``
that are zero or negative and ``neg_action`` is not 'raise'. Note that
if values are truncated, invertibility will not be preserved, and the
transformed array may not be perfectly inverse-transformed.
"""
def __init__(self, lmbda=None, lmbda2=0, neg_action="raise", floor=1e-16):
self.lmbda = lmbda
self.lmbda2 = lmbda2
self.neg_action = neg_action
self.floor = floor
def fit(self, y, exogenous=None):
"""Fit the transformer
Learns the value of ``lmbda``, if not specified in the constructor.
If defined in the constructor, is not re-learned.
Parameters
----------
y : array-like or None, shape=(n_samples,)
The endogenous (time-series) array.
exogenous : array-like or None, shape=(n_samples, n_features), optional
The exogenous array of additional covariates. Not used for
endogenous transformers. Default is None, and non-None values will
serve as pass-through arrays.
"""
lam1 = self.lmbda
lam2 = self.lmbda2
if lam2 < 0:
raise ValueError("lmbda2 must be a non-negative scalar value")
if lam1 is None:
y, _ = self._check_y_exog(y, exogenous)
_, lam1 = stats.boxcox(y, lmbda=None, alpha=None)
self.lam1_ = lam1
self.lam2_ = lam2
return self
def transform(self, y, exogenous=None, **_):
"""Transform the new array
Apply the Box-Cox transformation to the array after learning the
lambda parameter.
Parameters
----------
y : array-like or None, shape=(n_samples,)
The endogenous (time-series) array.
exogenous : array-like or None, shape=(n_samples, n_features), optional
The exogenous array of additional covariates. Not used for
endogenous transformers. Default is None, and non-None values will
serve as pass-through arrays.
Returns
-------
y_transform : array-like or None
The Box-Cox transformed y array
exogenous : array-like or None
The exog array
"""
check_is_fitted(self, "lam1_")
lam1 = self.lam1_
lam2 = self.lam2_
y, exog = self._check_y_exog(y, exogenous)
y += lam2
neg_mask = y <= 0.
if neg_mask.any():
action = self.neg_action
msg = "Negative or zero values present in y"
if action == "raise":
raise ValueError(msg)
elif action == "warn":
warnings.warn(msg, UserWarning)
y[neg_mask] = self.floor
if lam1 == 0:
return np.log(y), exog
return (y ** lam1 - 1) / lam1, exog
def inverse_transform(self, y, exogenous=None):
"""Inverse transform a transformed array
Inverse the Box-Cox transformation on the transformed array. Note that
if truncation happened in the ``transform`` method, invertibility will
not be preserved, and the transformed array may not be perfectly
inverse-transformed.
Parameters
----------
y : array-like or None, shape=(n_samples,)
The transformed endogenous (time-series) array.
exogenous : array-like or None, shape=(n_samples, n_features), optional
The exogenous array of additional covariates. Not used for
endogenous transformers. Default is None, and non-None values will
serve as pass-through arrays.
Returns
-------
y : array-like or None
The inverse-transformed y array
exogenous : array-like or None
The inverse-transformed exogenous array
"""
check_is_fitted(self, "lam1_")
lam1 = self.lam1_
lam2 = self.lam2_
y, exog = self._check_y_exog(y, exogenous)
if lam1 == 0:
return np.exp(y) - lam2, exog
numer = y * lam1 # remove denominator
numer += 1. # add 1 back to it
de_exp = numer ** (1. / lam1) # de-exponentiate
return de_exp - lam2, exog