-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support for np.cov #3345
Support for np.cov #3345
Changes from 53 commits
8020e5f
4146ae1
9e28566
6214f6a
ea5d7c7
87bfcec
7c184e4
07748c5
0de4791
3fffaf4
e24666b
794a8ad
eb52932
7312260
8fad984
14303ae
27e31f0
03ea0dc
39f1297
a276817
9fb51fa
9d17159
1735006
96a710f
6952b87
a6c3f1c
e3fe081
3d286c5
b3c1994
838b9b3
30f3d25
7470953
152aedd
2936be1
7b16f0f
54bd3cb
9847bc7
3bee5f3
279687a
b996a19
c124171
6d2caa1
f36ad97
9b5ae97
63043dc
8a3e7db
8f36a79
5a01f64
e69cd90
e4f9e06
fc154e0
e0783eb
611b62a
b824f7a
739c89d
ce38b07
9c7d3ad
ab87470
8f273fd
24b9db9
b3292d9
50e2396
e9111ee
8921894
fc5c55f
5903fbd
e33f296
1451718
e825651
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1294,6 +1294,183 @@ def np_vander_seq_impl(x, N=None, increasing=False): | |
elif isinstance(x, (types.Tuple, types.Sequence)): | ||
return np_vander_seq_impl | ||
|
||
#---------------------------------------------------------------------------- | ||
# Statistics | ||
|
||
@register_jitable | ||
def row_wise_average(a): | ||
assert a.ndim == 2 | ||
|
||
m, n = a.shape | ||
out = np.empty((m, 1), dtype=a.dtype) | ||
|
||
for i in range(m): | ||
out[i, 0] = np.sum(a[i, :]) / n | ||
|
||
return out | ||
|
||
@register_jitable | ||
def np_cov_impl_inner(X, bias, ddof): | ||
|
||
# determine degrees of freedom | ||
if ddof is None: | ||
if bias: | ||
ddof = 0 | ||
else: | ||
ddof = 1 | ||
|
||
# determine the normalization factor | ||
fact = X.shape[1] - ddof | ||
|
||
# numpy warns if less than 0 and floors at 0 | ||
fact = max(fact, 0.0) | ||
|
||
# de-mean | ||
X -= row_wise_average(X) | ||
|
||
# calculate result - requires blas | ||
c = np.dot(X, np.conj(X.T)) | ||
c *= np.true_divide(1, fact) | ||
return c | ||
|
||
def _prepare_cov_input(): | ||
pass | ||
|
||
@overload(_prepare_cov_input) | ||
def _prepare_cov_input_impl(m, y, rowvar, dtype): | ||
if y in (None, types.none): | ||
def _prepare_cov_input_inner(m, y, rowvar, dtype): | ||
m_arr = np.atleast_2d(_asarray(m)) | ||
|
||
# transpose if asked to and not a (1, n) vector | ||
if not rowvar and m_arr.shape[0] != 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the check for (1, n) need to happen, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, now that the "2D but single row" data shape is not allowed, this check is redundant - have removed it. |
||
m_arr = m_arr.T | ||
|
||
return m_arr | ||
else: | ||
def _prepare_cov_input_inner(m, y, rowvar, dtype): | ||
m_arr = np.atleast_2d(_asarray(m)) | ||
y_arr = np.atleast_2d(_asarray(y)) | ||
|
||
# transpose if asked to and not a (1, n) vector - this looks | ||
# wrong as you might end up transposing one and not the other, | ||
# but it's what numpy does | ||
if not rowvar: | ||
if m_arr.shape[0] != 1: | ||
m_arr = m_arr.T | ||
if y_arr.shape[0] != 1: | ||
y_arr = y_arr.T | ||
|
||
m_rows, m_cols = m_arr.shape | ||
y_rows, y_cols = y_arr.shape | ||
|
||
if m_cols != y_cols: | ||
raise ValueError('m and y must have the same number of variables') | ||
# 'variables' as the constraint on rows or columns depends on | ||
# whether rowvar is True or False... | ||
|
||
# allocate and fill output array | ||
out = np.empty((m_rows + y_rows, m_cols), dtype=dtype) | ||
out[:m_rows, :] = m_arr | ||
out[-y_rows:, :] = y_arr | ||
|
||
return out | ||
|
||
return _prepare_cov_input_inner | ||
|
||
@register_jitable | ||
def _handle_m_dim_change(m): | ||
if m.ndim == 2 and m.shape[0] == 1: | ||
msg = ("2D array containing a single row is unsupported due to " | ||
"ambiguity in type inference. To use numpy.cov in this case " | ||
"simply pass the row as a 1D array, i.e. m[0].") | ||
raise RuntimeError(msg) | ||
|
||
_handle_m_dim_nop = register_jitable(lambda x:x) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. space after the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
def determine_dtype(array_like): | ||
array_like_dt = np.float64 | ||
if isinstance(array_like, types.Array): | ||
array_like_dt = as_dtype(array_like.dtype) | ||
elif isinstance(array_like, (types.UniTuple, types.Tuple)): | ||
coltypes = set() | ||
for val in array_like: | ||
if hasattr(val, 'count'): | ||
[coltypes.add(v) for v in val] | ||
else: | ||
coltypes.add(val) | ||
if len(coltypes) > 1: | ||
array_like_dt = np.promote_types(*[as_dtype(ty) for ty in coltypes]) | ||
elif len(coltypes) == 1: | ||
array_like_dt = as_dtype(coltypes.pop()) | ||
|
||
return array_like_dt | ||
|
||
def check_dimensions(array_like, name): | ||
if isinstance(array_like, types.Array): | ||
if array_like.ndim > 2: | ||
raise TypeError("{0} has more than 2 dimensions".format(name)) | ||
elif isinstance(array_like, types.Sequence): | ||
if isinstance(array_like.key[0], types.Sequence): | ||
if isinstance(array_like.key[0].key[0], types.Sequence): | ||
raise TypeError("{0} has more than 2 dimensions".format(name)) | ||
|
||
if numpy_version >= (1, 10): # replicate behaviour post numpy 1.10 bugfix release | ||
@overload(np.cov) | ||
def np_cov(m, y=None, rowvar=True, bias=False, ddof=None): | ||
|
||
# reject problem if m and / or y are more than 2D | ||
check_dimensions(m, 'm') | ||
check_dimensions(y, 'y') | ||
|
||
# special case for 2D array input with 1 row of data - select | ||
# handler function which we'll call later when we have access | ||
# to the shape of the input array | ||
_M_DIM_HANDLER = _handle_m_dim_nop | ||
if isinstance(m, types.Array): | ||
_M_DIM_HANDLER = _handle_m_dim_change | ||
|
||
# infer result dtype | ||
m_dt = determine_dtype(m) | ||
y_dt = determine_dtype(y) | ||
dtype = np.result_type(m_dt, y_dt, np.float64) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added something (possibly a bit crude) which should allow int, bool and float if integral value - with some explicit tests. Let me know what you think. I can't think of a case where anything other than 0 and 1 make sense, in the absence of weights (which I haven't implemented anyway). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great, thanks. |
||
def np_cov_impl(m, y=None, rowvar=True, bias=False, ddof=None): | ||
_M_DIM_HANDLER(m) | ||
X = _prepare_cov_input(m, y, rowvar, dtype).astype(dtype) | ||
|
||
if np.any(np.array(X.shape) == 0): | ||
return np.full((X.shape[0], X.shape[0]), fill_value=np.nan, dtype=dtype) | ||
else: | ||
return np_cov_impl_inner(X, bias, ddof) | ||
|
||
def np_cov_impl_single_variable(m, y=None, rowvar=True, bias=False, ddof=None): | ||
_M_DIM_HANDLER(m) | ||
X = _prepare_cov_input(m, y, rowvar, dtype).astype(dtype) | ||
|
||
if np.any(np.array(X.shape) == 0): | ||
variance = np.nan | ||
else: | ||
variance = np_cov_impl_inner(X, bias, ddof).flat[0] | ||
|
||
return np.array(variance) | ||
|
||
# identify up front if output is 0D | ||
if isinstance(m, types.Array) and m.ndim == 1 or isinstance(m, types.Tuple): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit puzzled by this, a
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, Numba has a load of
(ignore the iter and class entries). I think this probably needs to check if if isinstance(m, types.BaseTuple):
if all(isinstance(x, (types.Number, types.Boolean)) for x in m.types):
if y in (None, types.none):
return np_cov_impl_single_variable might help? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this was a bug on my part. The tests all miraculously passed due to the way the if gates were set up and the various test cases resolving to UniTuple vs Tuple type etc. Should be sorted now, or at least nearer to being sorted. I added your example explicitly in tests. |
||
if y in (None, types.none): | ||
return np_cov_impl_single_variable | ||
|
||
if isinstance(m, (types.Integer, types.Float, types.Complex, types.Boolean)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think checking inheritance from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
if y in (None, types.none): | ||
return np_cov_impl_single_variable | ||
|
||
if isinstance(m, types.Sequence): | ||
if not isinstance(m.key[0], types.Sequence) and y in (None, types.none): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may cause issues at some point, but we can defer concern until sequence type detection is altered. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, there may be a tax to pay - but there are quite a few tests so it should at least be apparent when it's affected and needs a rethink |
||
return np_cov_impl_single_variable | ||
|
||
# otherwise assume it's 2D and we're good to go | ||
return np_cov_impl | ||
|
||
#---------------------------------------------------------------------------- | ||
# Element-wise computations | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from numba.numpy_support import version as np_version | ||
from numba.errors import TypingError | ||
from .support import TestCase, CompilationCache, MemoryLeakMixin | ||
from .matmul_usecase import needs_blas | ||
|
||
no_pyobj_flags = Flags() | ||
no_pyobj_flags.set("nrt") | ||
|
@@ -88,6 +89,9 @@ def vander(x, N=None, increasing=False): | |
def partition(a, kth): | ||
return np.partition(a, kth) | ||
|
||
def cov(m, y=None, rowvar=True, bias=False, ddof=None): | ||
return np.cov(m, y, rowvar, bias, ddof) | ||
|
||
def ediff1d(ary, to_end=None, to_begin=None): | ||
return np.ediff1d(ary, to_end, to_begin) | ||
|
||
|
@@ -562,10 +566,10 @@ def test_convolve_exceptions(self): | |
else: | ||
self.assertIn("'v' cannot be empty", str(raises.exception)) | ||
|
||
def _check_output(self, pyfunc, cfunc, params): | ||
def _check_output(self, pyfunc, cfunc, params, abs_tol=None): | ||
expected = pyfunc(**params) | ||
got = cfunc(**params) | ||
self.assertPreciseEqual(expected, got) | ||
self.assertPreciseEqual(expected, got, abs_tol=abs_tol) | ||
|
||
def test_vander_basic(self): | ||
pyfunc = vander | ||
|
@@ -1088,6 +1092,170 @@ def test_partition_boolean_inputs(self): | |
for kth in True, False, -1, 0, 1: | ||
self.partition_sanity_check(pyfunc, cfunc, d, kth) | ||
|
||
@unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+") | ||
@needs_blas | ||
def test_cov_basic(self): | ||
pyfunc = cov | ||
cfunc = jit(nopython=True)(pyfunc) | ||
_check = partial(self._check_output, pyfunc, cfunc, abs_tol=1e-14) | ||
|
||
def m_variations(): | ||
# array inputs | ||
yield np.array([[0, 2], [1, 1], [2, 0]]).T | ||
yield self.rnd.randn(100).reshape(5, 20) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps add:
for a fortran order and a slice. Note the fortran order specified a few lines below is also C contig by virtue of it being a single "column" of data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done and good spot with the Fortan order blunder. |
||
yield np.array([0.3942, 0.5969, 0.7730, 0.9918, 0.7964]) | ||
yield np.full((4, 5), fill_value=True) | ||
yield np.array([np.nan, 0.5969, -np.inf, 0.9918, 0.7964]) | ||
yield np.linspace(-3, 3, 33).reshape(33, 1, order='F') | ||
|
||
# non-array inputs | ||
yield ((0.1, 0.2), (0.11, 0.19), (0.09, 0.21)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps add a non-homogeneous tuple in here? Like:
? It should trip the issue described above WRT |
||
yield (-2.1, -1, 4.3) | ||
yield (1, 2, 3) | ||
yield [4, 5, 6] | ||
yield ((0.1, 0.2, 0.3), (0.1, 0.2, 0.3)) | ||
yield [(1, 2, 3), (1, 3, 2)] | ||
yield 3.142 | ||
|
||
# empty data structures | ||
yield np.array([]) | ||
yield np.array([]).reshape(0, 2) | ||
yield np.array([]).reshape(2, 0) | ||
yield () | ||
|
||
# all inputs other than the first are defaulted | ||
for m in m_variations(): | ||
_check({'m': m}) | ||
|
||
@unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+") | ||
@needs_blas | ||
def test_cov_explicit_arguments(self): | ||
pyfunc = cov | ||
cfunc = jit(nopython=True)(pyfunc) | ||
_check = partial(self._check_output, pyfunc, cfunc, abs_tol=1e-14) | ||
|
||
m = self.rnd.randn(1050).reshape(150, 7) | ||
y_choices = None, m[::-1] | ||
rowvar_choices = False, True | ||
bias_choices = False, True | ||
ddof_choice = None, -1, 0, 1, 3 | ||
|
||
for y, rowvar, bias, ddof in itertools.product(y_choices, rowvar_choices, bias_choices, ddof_choice): | ||
params = {'m': m, 'y': y, 'ddof': ddof, 'bias': bias, 'rowvar': rowvar} | ||
_check(params) | ||
|
||
@unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+") | ||
@needs_blas | ||
def test_cov_egde_cases(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/egde/edge/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
pyfunc = cov | ||
cfunc = jit(nopython=True)(pyfunc) | ||
_check = partial(self._check_output, pyfunc, cfunc, abs_tol=1e-14) | ||
|
||
# examples borrowed from numpy doc string / unit tests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Explicit reference to the source location in NumPy please. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
m = np.array([-2.1, -1, 4.3]) | ||
y = np.array([3, 1.1, 0.12]) | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
|
||
m = np.array([[0, 2], [1, 1], [2, 0]]).T | ||
params = {'m': m, 'ddof': 5} | ||
_check(params) | ||
|
||
m = np.array([1, 2, 3]) # test case modified such that m is 1D | ||
y = np.array([[1j, 2j, 3j]]) | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
|
||
m = np.array([1, 2, 3]) | ||
y = (1j, 2j, 3j) | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
params = {'m': y, 'y': m} # flip real and complex inputs | ||
_check(params) | ||
|
||
m = np.array([1, 2, 3]) | ||
y = (1j, 2j, 3) # note last item is not complex | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
params = {'m': y, 'y': m} # flip real and complex inputs | ||
_check(params) | ||
|
||
m = np.array([]) | ||
y = np.array([]) | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
|
||
m = 1.1 | ||
y = 2.2 | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
|
||
m = self.rnd.randn(10, 3) | ||
y = np.array([-2.1, -1, 4.3]).reshape(1, 3) / 10 | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
|
||
# The following tests pass with numpy version >= 1.10, but fail with 1.9 | ||
m = np.array([-2.1, -1, 4.3]) | ||
y = np.array([[3, 1.1, 0.12], [3, 1.1, 0.12]]) | ||
params = {'m': m, 'y': y} | ||
_check(params) | ||
|
||
for rowvar in False, True: | ||
m = np.array([-2.1, -1, 4.3]) | ||
y = np.array([[3, 1.1, 0.12], [3, 1.1, 0.12], [4, 1.1, 0.12]]) | ||
params = {'m': m, 'y': y, 'rowvar': rowvar} | ||
_check(params) | ||
|
||
@unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+") | ||
@needs_blas | ||
def test_cov_exceptions(self): | ||
pyfunc = cov | ||
cfunc = jit(nopython=True)(pyfunc) | ||
|
||
# Exceptions leak references | ||
self.disable_leak_check() | ||
|
||
def _check_m(m): | ||
with self.assertTypingError() as raises: | ||
cfunc(m) | ||
self.assertIn('m has more than 2 dimensions', str(raises.exception)) | ||
|
||
m = np.ones((5, 6, 7)) | ||
_check_m(m) | ||
|
||
m = ((((1, 2, 3), (2, 2, 2)),),) | ||
_check_m(m) | ||
|
||
m = [[[5, 6, 7]]] | ||
_check_m(m) | ||
|
||
def _check_y(m, y): | ||
with self.assertTypingError() as raises: | ||
cfunc(m, y=y) | ||
self.assertIn('y has more than 2 dimensions', str(raises.exception)) | ||
|
||
m = np.ones((5, 6)) | ||
y = np.ones((5, 6, 7)) | ||
_check_y(m, y) | ||
|
||
m = np.array((1.1, 2.2, 1.1)) | ||
y = (((1.2, 2.2, 2.3),),) | ||
_check_y(m, y) | ||
|
||
m = np.arange(3) | ||
y = np.arange(4) | ||
with self.assertRaises(ValueError) as raises: | ||
cfunc(m, y=y) | ||
self.assertIn('m and y must have the same number of variables', str(raises.exception)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps this should refer to the dimension size mismatch? Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed the wording a bit - I meant 'variables' in the statistics sense, but I agree the wording's poor. Hopefully it's now less poor :) |
||
# Numpy raises ValueError: all the input array dimensions except for the | ||
# concatenation axis must match exactly. | ||
|
||
m = np.array([-2.1, -1, 4.3]).reshape(1, 3) | ||
with self.assertRaises(RuntimeError) as raises: | ||
cfunc(m) | ||
self.assertIn('2D array containing a single row is unsupported', str(raises.exception)) | ||
|
||
@unittest.skipUnless(np_version >= (1, 12), "ediff1d needs Numpy 1.12+") | ||
def test_ediff1d_basic(self): | ||
pyfunc = ediff1d | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should note requirement of SciPy 0.16+.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done