Skip to content

Commit

Permalink
Add more tests, coverage, fix some bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
tgsmith61591 committed Jun 1, 2017
1 parent 09ee745 commit a63f7b7
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 140 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -2,6 +2,9 @@
.DS_Store
.idea/

# custom extension
*.pmdpkl

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
5 changes: 3 additions & 2 deletions pyramid/arima/approx.py
Expand Up @@ -98,15 +98,16 @@ def approx(x, y, xout, method='linear', rule=1, n=50, f=0, yleft=None,
xout = c(xout).astype(np.float64) # ensure double

# check method
method = get_callable(method, VALID_APPROX) # not a callable, actually...
method_key = method
method = get_callable(method_key, VALID_APPROX) # not a callable, actually...

# copy/regularize vectors
x, y = _regularize(x, y, ties)
nx = x.shape[0]

# if len 1?
if nx <= 1:
if method == 'linear':
if method_key == 'linear':
raise ValueError('need at least two points to linearly interpolate')
if nx == 0:
raise ValueError('empty array')
Expand Down
152 changes: 64 additions & 88 deletions pyramid/arima/arima.py
Expand Up @@ -11,8 +11,11 @@
from sklearn.utils.validation import check_array, check_is_fitted
from sklearn.utils.metaestimators import if_delegate_has_method
from statsmodels.tsa.arima_model import ARIMA as _ARIMA
from statsmodels.tsa.base.tsa_model import TimeSeriesModelResults
from statsmodels import api as sm
import datetime
import warnings
import os

# The DTYPE we'll use for everything here. Since there are
# lots of spots where we define the DTYPE in a numpy array,
Expand Down Expand Up @@ -204,19 +207,17 @@ def _fit_wrapper():
disp=self.disp, callback=self.callback,
**fit_args)


# sometimes too many warnings...
if self.suppress_warnings:
with warnings.catch_warnings(record=False):
warnings.simplefilter('ignore')
self.arima_, self.arima_res_ = _fit_wrapper()
_, self.arima_res_ = _fit_wrapper()
else:
self.arima_, self.arima_res_ = _fit_wrapper()
_, self.arima_res_ = _fit_wrapper()

return self

def predict(self, n_periods=10, exogenous=None, alpha=0.05,
include_std_err=False, include_conf_int=False):
def predict(self, n_periods=10, exogenous=None):
"""Generate predictions (forecasts) ``n_periods`` in the future. Note that unless
``include_std_err`` or ``include_conf_int`` are True, only the forecast
array will be returned (otherwise, a tuple with the corresponding elements
Expand Down Expand Up @@ -246,32 +247,21 @@ def predict(self, n_periods=10, exogenous=None, alpha=0.05,
-------
forecasts : array-like, shape=(n_periods,)
The array of fore-casted values.
stderr : array-like, shape=(n_periods,)
The array of standard errors (NOTE this is only returned
if ``include_std_err`` is True).
conf_int : array-like, shape=(n_periods, 2)
The array of confidence intervals (NOTE this is only returned
if ``include_conf_int`` is True).
"""
check_is_fitted(self, 'arima_res_')

# use the results wrapper to predict so it injects its own params
# (also if I was 0, ARMA will not have a forecast method natively)
f, s, c = self.arima_res_.forecast(steps=n_periods, exog=exogenous, alpha=alpha)
# ARIMA predicts differently...
if self.seasonal_order is None:
# use the results wrapper to predict so it injects its own params
# (also if I was 0, ARMA will not have a forecast method natively)
f, _, _ = self.arima_res_.forecast(steps=n_periods, exog=exogenous)
else:
f = self.arima_res_.forecast(steps=n_periods, exog=exogenous)

# different variants of the stats someone might want returned...
if include_std_err and include_conf_int:
return f, s, c
elif include_conf_int:
return f, c
elif include_std_err:
return f, s
return f

def fit_predict(self, y, exogenous=None, n_periods=10, alpha=0.05,
include_std_err=False, include_conf_int=False, **fit_args):
def fit_predict(self, y, exogenous=None, n_periods=10, **fit_args):
"""Fit an ARIMA to a vector, ``y``, of observations, and then
generate predictions.
Expand Down Expand Up @@ -300,8 +290,56 @@ def fit_predict(self, y, exogenous=None, n_periods=10, alpha=0.05,
Any keyword args to pass to the fit method.
"""
self.fit(y, exogenous, **fit_args)
return self.predict(n_periods=n_periods, exogenous=exogenous, alpha=alpha,
include_std_err=include_std_err, include_conf_int=include_conf_int)
return self.predict(n_periods=n_periods, exogenous=exogenous)

def _get_pickle_hash_file(self):
# Mmmm, pickle hash...
return '.%s-%i.pmdpkl' % (str(datetime.datetime.now()).replace(' ', '_'), hash(self))

def __getstate__(self):
"""I am being pickled..."""
loc = self.__dict__.get('tmp_pkl_', None)

# if this already contains a pointer to a "saved state" model,
# delete that model and replace it with a new one
if loc is not None:
os.unlink(loc)

# get the new location for where to save the results
new_loc = self._get_pickle_hash_file()
cwd = os.path.abspath(os.getcwd())
new_loc = os.path.join(cwd, new_loc)

# save the results - but only if it's fit...
if hasattr(self, 'arima_res_'):
# statsmodels result views work by caching metrics. If they
# are not cached prior to pickling, we might hit issues. This is
# a bug documented here: https://github.com/statsmodels/statsmodels/issues/3290
_ = self.arima_res_.summary()
self.arima_res_.save(fname=new_loc, remove_data=False)

# point to the location of the saved MLE model
self.tmp_pkl_ = new_loc

return self.__dict__

def __setstate__(self, state):
# I am being unpickled...
self.__dict__ = state

# re-set the results class
loc = state.get('tmp_pkl_', None)
if loc is not None:
self.arima_res_ = TimeSeriesModelResults.load(loc)

return self

def _clear_cached_state(self):
# when fit in an auto-arima, a lot of cached .pmdpkl files
# are generated if fit in parallel... this removes the tmp file
loc = self.__dict__.get('tmp_pkl_', None)
if loc is not None:
os.unlink(loc)

@if_delegate_has_method('arima_res_')
def aic(self):
Expand Down Expand Up @@ -376,19 +414,6 @@ def bse(self):
"""
return self.arima_res_.bse

@if_delegate_has_method('arima_res_')
def df_model(self):
"""Get the model degrees of freedom:
k_exog + k_trend + k_ar + k_ma
Returns
-------
df_model : array-like
The degrees of freedom.
"""
return self.arima_res_.df_model

@if_delegate_has_method('arima_res_')
def df_resid(self):
"""Get the residual degrees of freedom:
Expand Down Expand Up @@ -418,40 +443,6 @@ def hqic(self):
"""
return self.arima_res_.hqic

@if_delegate_has_method('arima_res_')
def k_ar(self):
"""Get the number of AR coefficients in the model.
Returns
-------
k_ar : int
The number of AR coefficients.
"""
return self.arima_res_.k_ar

@if_delegate_has_method('arima_res_')
def k_exog(self):
"""Get the number of exogenous variables included in the model. Does not
include the constant.
Returns
-------
k_exog : int
The number of features in the exogenous variables.
"""
return self.arima_res_.k_exog

@if_delegate_has_method('arima_res_')
def k_ma(self):
"""Get the number of MA coefficients in the model.
Returns
-------
k_ma : int
The number of MA coefficients.
"""
return self.arima_res_.k_ma

@if_delegate_has_method('arima_res_')
def maparams(self):
"""Get the value of the moving average coefficients.
Expand Down Expand Up @@ -518,18 +509,3 @@ def resid(self):
The model residuals.
"""
return self.arima_res_.resid

@if_delegate_has_method('arima_res_')
def sigma2(self):
"""Get the variance of the residuals. If the model is fit by 'css',
sigma2 = ssr/nobs, where ssr is the sum of squared residuals. If
the model is fit by 'mle', then sigma2 = 1/nobs * sum(v^2 / F)
where v is the one-step forecast error and F is the forecast error
variance.
Returns
-------
sigma2 : float
The variance of the residuals
"""
return self.arima_res_.sigma2
25 changes: 19 additions & 6 deletions pyramid/arima/auto.py
Expand Up @@ -37,7 +37,7 @@
def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2, max_q=5,
start_P=1, D=None, start_Q=1, max_P=2, max_D=1, max_Q=2, max_order=None, m=1,
seasonal=True, stationary=False, information_criterion='aic', alpha=0.05, test='kpss',
seasonal_test='ch', n_jobs=1, start_params=None, trend='c', method="css-mle", transparams=True,
seasonal_test='ch', n_jobs=1, start_params=None, trend='c', method=None, transparams=True,
solver='lbfgs', maxiter=50, disp=0, callback=None, offset_test_args=None, seasonal_test_args=None,
suppress_warnings=False, error_action='warn', **fit_args):
"""The ``AutoARIMA`` function seeks to identify the most optimal parameters for an ``ARIMA`` model,
Expand Down Expand Up @@ -380,12 +380,13 @@ def generator():
)

# get results in parallel
gen = generator() # the combos we need to fit
all_res = Parallel(n_jobs=n_jobs)(
delayed(_fit_arima)(y, xreg=exogenous, order=order, seasonal_order=seasonal_order,
start_params=start_params, trend=trend, method=method, transparams=transparams,
solver=solver, maxiter=maxiter, disp=disp, callback=callback,
fit_params=fit_args, suppress_warnings=suppress_warnings, error_action=error_action)
for order, seasonal_order in generator())
for order, seasonal_order in gen)

# filter the non-successful ones
filtered = [m for m in all_res if m is not None]
Expand All @@ -395,7 +396,13 @@ def generator():

# sort by the criteria
sorted_res = sorted(filtered, key=(lambda model: getattr(model, information_criterion)()), reverse=True)
return sorted_res[0]
best = sorted_res[0]

# remove all the cached .pmdpkl files...
for model in sorted_res:
model._clear_cached_state()

return best


def _fit_arima(x, xreg, order, seasonal_order, start_params, trend, method, transparams,
Expand All @@ -411,10 +418,16 @@ def _fit_arima(x, xreg, order, seasonal_order, start_params, trend, method, tran
# for non-stationarity errors, return None
except ValueError as v:
if error_action == 'warn':
warnings.warn('Unable to fit ARIMA for order=(%i, %i, %i); data is likely non-stationary. '
'(if you do not want to see these warnings, run with error_action="ignore")'
% (order[0], order[1], order[2]))
warnings.warn(_fmt_warning_str(order, seasonal_order))
elif error_action == 'raise':
raise v
# otherwise it's 'ignore'
return None


def _fmt_warning_str(order, seasonal_order):
"""This is just so we can test that the string will format even if we don't want the warnings in the tests"""
return ('Unable to fit ARIMA for order=(%i, %i, %i)%s; data is likely non-stationary. '
'(if you do not want to see these warnings, run with error_action="ignore")'
% (order[0], order[1], order[2], '' if seasonal_order is None else ' seasonal_order=(%i, %i, %i, %i)'
% (seasonal_order[0], seasonal_order[1], seasonal_order[2], seasonal_order[3])))
15 changes: 15 additions & 0 deletions pyramid/arima/tests/test_approx.py
Expand Up @@ -4,6 +4,7 @@
from pyramid.arima.approx import approx, _regularize
from pyramid.utils.array import c
from numpy.testing import assert_array_almost_equal
from nose.tools import assert_raises
import numpy as np

table = c(0.216, 0.176, 0.146, 0.119)
Expand Down Expand Up @@ -31,3 +32,17 @@ def test_approx_rule2():
x, y = approx(table, tablep, stat, rule=2)
assert_array_almost_equal(x, c(1.01))
assert_array_almost_equal(y, c(0.01))


def test_corners():
# fails for length differences
assert_raises(ValueError, approx, x=[1, 2, 3], y=[1, 2], xout=1.0)

# fails for bad string
assert_raises(ValueError, approx, x=table, y=table, xout=1.0, method='bad-string')

# fails for bad length
assert_raises(ValueError, approx, x=[], y=[], xout=[], ties='mean')

# fails for linear when < 2 samples
assert_raises(ValueError, approx, x=[1], y=[1], xout=[], method='linear', ties='ordered')

0 comments on commit a63f7b7

Please sign in to comment.