Skip to content

Commit

Permalink
Amend doc, add is_iterable func, compute machine eps dynamically
Browse files Browse the repository at this point in the history
  • Loading branch information
tgsmith61591 committed Jan 6, 2018
1 parent 4029c77 commit 55fc876
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 16 deletions.
57 changes: 57 additions & 0 deletions benchmarks/benchmark_funcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
#
# Benchmark various approaches to functions to speed things up.
# ... hopefully.

from __future__ import absolute_import

import numpy as np

import time


def _do_time(func, n_iter=10, *args, **kwargs):
times = []
for _ in range(n_iter):
start = time.time()
func(*args, **kwargs)
times.append(time.time() - start)

times = np.asarray(times)
print("Completed %i iterations (avg=%.6f, min=%.6f, max=%.6f)"
% (n_iter, times.mean(), times.min(), times.max()))


def benchmark_is_constant():
"""This benchmarks the "is_constant" function from ``pyramid.arima.utils``.
This was added in 0.6.2.
"""
# WINNER!
def is_const1(x):
"""This is the version in Pyramid 0.6.2.
Parameters
----------
x : np.ndarray
This is the array.
"""
return (x == x[0]).all()

def is_const2(x):
"""This should ostensibly only take O(N) rather than O(2N) like
its predecessor. But we'll see...
Parameters
----------
x : np.ndarray
This is the array.
"""
return np.unique(x).shape[0] == 1

x = np.random.choice(np.arange(10), 1000000, replace=True)
_do_time(is_const1, 25, x)
_do_time(is_const2, 25, x)


if __name__ == '__main__':
benchmark_is_constant()
12 changes: 8 additions & 4 deletions pyramid/arima/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@
# Automatically find optimal parameters for an ARIMA

from __future__ import absolute_import

from sklearn.utils.validation import check_array, column_or_1d
from sklearn.utils import check_random_state
from sklearn.externals.joblib import Parallel, delayed
from sklearn.linear_model import LinearRegression

from numpy.linalg import LinAlgError
import numpy as np

import warnings
import time

from .utils import ndiffs, is_constant, nsdiffs
from ..utils import diff
from ..utils import diff, is_iterable
from .arima import ARIMA

# for python 3 compat
Expand Down Expand Up @@ -874,8 +877,9 @@ def _post_ppc_arima(a):
The list or ARIMAs, or an ARIMA
"""
# if it's a result of making it to the end, it will
# be a list of ARIMA models.
if hasattr(a, '__iter__'):
# be a list of ARIMA models. Filter out the Nones
# (the failed models)...
if is_iterable(a):
a = [m for m in a if m is not None]

# if the list is empty, or if it was an ARIMA and it's None
Expand Down Expand Up @@ -906,7 +910,7 @@ def _return_wrapper(fits, return_all, start, trace):
Whether to return all.
"""
# make sure it's an iterable
if not hasattr(fits, '__iter__'):
if not is_iterable(fits):
fits = [fits]

# whether to print the final runtime
Expand Down
16 changes: 13 additions & 3 deletions pyramid/arima/seasonality.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,22 @@ def _sd_test(wts, s):

# UPDATE 01/04/2018 - we can get away without computing u, v
# (this is also MUCH MUCH faster!!!)
sv = svd(tmp, compute_uv=False)
if sv.min() < 2.220446e-16: # machine min eps
sv = svd(tmp, compute_uv=False) # type: np.ndarray

# From R:
# double.eps: the smallest positive floating-point number ‘x’ such that
# ‘1 + x != 1’. It equals ‘double.base ^ ulp.digits’ if either
# ‘double.base’ is 2 or ‘double.rounding’ is 0; otherwise, it
# is ‘(double.base ^ double.ulp.digits) / 2’. Normally
# ‘2.220446e-16’.
# Numpy's float64 has an eps of 2.2204460492503131e-16
if sv.min() < np.finfo(sv.dtype).eps: # machine min eps
return 0

# solve against the identity matrix, then produce
# a nasty mess of dot products...
# a nasty mess of dot products... this is the (horrendous) R code:
# (1/N^2) * sum(diag(solve(tmp) %*% t(A) %*% t(Fhat) %*% Fhat %*% A))
# https://github.com/robjhyndman/forecast/blob/master/R/arima.R#L321
solved = solve(tmp, np.identity(tmp.shape[0]))
return (1.0 / n ** 2) * solved.dot(A.T).dot(
Fhat.T).dot(Fhat).dot(A).diagonal().sum()
Expand Down
1 change: 1 addition & 0 deletions pyramid/arima/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Common ARIMA functions

from __future__ import absolute_import

from sklearn.utils.validation import check_array, column_or_1d
import numpy as np

Expand Down
51 changes: 44 additions & 7 deletions pyramid/utils/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
# Array utilities

from __future__ import absolute_import, division

from sklearn.utils.validation import check_array
from sklearn.externals import six

import numpy as np

__all__ = [
'c',
'diff'
'diff',
'is_iterable'
]


Expand All @@ -20,16 +24,27 @@ def c(*args):
that wraps ``numpy.concatenate``? Similar to R, this works with scalars,
iterables, and any mix therein.
Note that using the ``c`` function on multi-nested lists or iterables
will fail!
Examples
--------
>>> from pyramid.utils import c
Using ``c`` with *args will yield a single array:
>>> c(1, 2, 3, 4)
array([1, 2, 3, 4])
>>> from pyramid.utils import c
Using ``c`` with nested lists and scalars will also yield a single array:
>>> c([1, 2], 4, c(5, 4))
array([1, 2, 4, 5, 4])
However, using ``c`` with multi-level lists will fail!
>>> c([1, 2, 3], [[1, 2]])
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pyramid/utils/array.py", line 64, in c
return np.concatenate([a if is_iterable(a) else [a] for a in args])
ValueError: all the input arrays must have same number of dimensions
References
----------
.. [1] https://stat.ethz.ch/R-manual/R-devel/library/base/html/c.html
Expand All @@ -43,14 +58,14 @@ def c(*args):
element = args[0]

# if it's iterable, make it an array
if hasattr(element, '__iter__'):
if is_iterable(element):
return np.asarray(element)

# otherwise it's not iterable, put it in an array
return np.asarray([element])

# concat all
return np.concatenate([a if hasattr(a, '__iter__') else [a] for a in args])
return np.concatenate([a if is_iterable(a) else [a] for a in args])


def _diff_vector(x, lag):
Expand Down Expand Up @@ -107,7 +122,7 @@ def diff(x, lag=1, differences=1):
>>> diff(x, 6, 1)
array([], dtype=float32)
>>> from pyramid.utils import c, diff
>>> from pyramid.utils import diff
>>> import numpy as np
>>>
>>> x = np.arange(1, 10).reshape((3, 3)).T
Expand Down Expand Up @@ -138,7 +153,7 @@ def diff(x, lag=1, differences=1):
if any(v < 1 for v in (lag, differences)):
raise ValueError('lag and differences must be positive (> 0) integers')

x = check_array(x, ensure_2d=False, dtype=np.float32)
x = check_array(x, ensure_2d=False, dtype=np.float32) # type: np.ndarray
fun = _diff_vector if len(x.shape) == 1 else _diff_matrix
res = x

Expand All @@ -150,3 +165,25 @@ def diff(x, lag=1, differences=1):
return res

return res


def is_iterable(x):
"""Determine whether an object ``x`` is iterable. In Python 2, this
was as simple as checking for the ``__iter__`` attribute. However, in
Python 3, strings became iterable. Therefore, this function checks for the
``__iter__`` attribute, returning True if present (except for strings,
for which it will return False).
Parameters
----------
x : str, iterable or object
The object in question.
Returns
-------
isiter : bool
True if iterable, else False.
"""
if isinstance(x, six.string_types):
return False
return hasattr(x, '__iter__')
3 changes: 2 additions & 1 deletion pyramid/utils/metaestimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# Metaestimators for the ARIMA class
# Metaestimators for the ARIMA class. These classes are derived from the
# sklearn metaestimators, but adapted for more specific use with pyramid.

from __future__ import absolute_import
from operator import attrgetter
Expand Down
11 changes: 10 additions & 1 deletion pyramid/utils/tests/test_array.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@

from __future__ import absolute_import
from pyramid.utils.array import diff, c

from pyramid.utils.array import diff, c, is_iterable
from pyramid.utils import get_callable

from numpy.testing import assert_array_equal
from nose.tools import assert_raises
import numpy as np
Expand Down Expand Up @@ -42,3 +44,10 @@ def test_corner_in_callable():
def test_corner():
# fails because lag < 1
assert_raises(ValueError, diff, x=x, lag=0)


def test_is_iterable():
assert not is_iterable("this string")
assert is_iterable(["this", "list"])
assert not is_iterable(None)
assert is_iterable(np.array([1, 2]))

0 comments on commit 55fc876

Please sign in to comment.