Skip to content

Commit

Permalink
Merge pull request #3345 from rjenc29/cov
Browse files Browse the repository at this point in the history
Support for np.cov
  • Loading branch information
seibert committed Oct 31, 2018
2 parents 5996c58 + e825651 commit b44b5e6
Show file tree
Hide file tree
Showing 3 changed files with 414 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/source/reference/numpysupported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ The following top-level functions are supported:
* :func:`numpy.convolve` (only the 2 first arguments)
* :func:`numpy.copy` (only the first argument)
* :func:`numpy.correlate` (only the 2 first arguments)
* :func:`numpy.cov` (only the 5 first arguments, requires NumPy >= 1.10 and SciPy >= 0.16)
* :func:`numpy.diag`
* :func:`numpy.digitize`
* :func:`numpy.dstack`
Expand Down
208 changes: 208 additions & 0 deletions numba/targets/arraymath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,214 @@ def np_vander_seq_impl(x, N=None, increasing=False):
elif isinstance(x, (types.Tuple, types.Sequence)):
return np_vander_seq_impl

#----------------------------------------------------------------------------
# Statistics

@register_jitable
def row_wise_average(a):
assert a.ndim == 2

m, n = a.shape
out = np.empty((m, 1), dtype=a.dtype)

for i in range(m):
out[i, 0] = np.sum(a[i, :]) / n

return out

@register_jitable
def np_cov_impl_inner(X, bias, ddof):

# determine degrees of freedom
if ddof is None:
if bias:
ddof = 0
else:
ddof = 1

# determine the normalization factor
fact = X.shape[1] - ddof

# numpy warns if less than 0 and floors at 0
fact = max(fact, 0.0)

# de-mean
X -= row_wise_average(X)

# calculate result - requires blas
c = np.dot(X, np.conj(X.T))
c *= np.true_divide(1, fact)
return c

def _prepare_cov_input_inner():
pass

@overload(_prepare_cov_input_inner)
def _prepare_cov_input_impl(m, y, rowvar, dtype):
if y in (None, types.none):
def _prepare_cov_input_inner(m, y, rowvar, dtype):
m_arr = np.atleast_2d(_asarray(m))

if not rowvar:
m_arr = m_arr.T

return m_arr
else:
def _prepare_cov_input_inner(m, y, rowvar, dtype):
m_arr = np.atleast_2d(_asarray(m))
y_arr = np.atleast_2d(_asarray(y))

# transpose if asked to and not a (1, n) vector - this looks
# wrong as you might end up transposing one and not the other,
# but it's what numpy does
if not rowvar:
if m_arr.shape[0] != 1:
m_arr = m_arr.T
if y_arr.shape[0] != 1:
y_arr = y_arr.T

m_rows, m_cols = m_arr.shape
y_rows, y_cols = y_arr.shape

if m_cols != y_cols:
raise ValueError("m and y have incompatible dimensions")

# allocate and fill output array
out = np.empty((m_rows + y_rows, m_cols), dtype=dtype)
out[:m_rows, :] = m_arr
out[-y_rows:, :] = y_arr

return out

return _prepare_cov_input_inner

@register_jitable
def _handle_m_dim_change(m):
if m.ndim == 2 and m.shape[0] == 1:
msg = ("2D array containing a single row is unsupported due to "
"ambiguity in type inference. To use numpy.cov in this case "
"simply pass the row as a 1D array, i.e. m[0].")
raise RuntimeError(msg)

_handle_m_dim_nop = register_jitable(lambda x: x)

def determine_dtype(array_like):
array_like_dt = np.float64
if isinstance(array_like, types.Array):
array_like_dt = as_dtype(array_like.dtype)
elif isinstance(array_like, (types.UniTuple, types.Tuple)):
coltypes = set()
for val in array_like:
if hasattr(val, 'count'):
[coltypes.add(v) for v in val]
else:
coltypes.add(val)
if len(coltypes) > 1:
array_like_dt = np.promote_types(*[as_dtype(ty) for ty in coltypes])
elif len(coltypes) == 1:
array_like_dt = as_dtype(coltypes.pop())

return array_like_dt

def check_dimensions(array_like, name):
if isinstance(array_like, types.Array):
if array_like.ndim > 2:
raise TypeError("{0} has more than 2 dimensions".format(name))
elif isinstance(array_like, types.Sequence):
if isinstance(array_like.key[0], types.Sequence):
if isinstance(array_like.key[0].key[0], types.Sequence):
raise TypeError("{0} has more than 2 dimensions".format(name))

@register_jitable
def _handle_ddof(ddof):
if not np.isfinite(ddof):
raise ValueError('Cannot convert non-finite ddof to integer')
if ddof - int(ddof) != 0:
raise ValueError('ddof must be integral value')

_handle_ddof_nop = register_jitable(lambda x: x)

@register_jitable
def _prepare_cov_input(m, y, rowvar, dtype, ddof, _DDOF_HANDLER, _M_DIM_HANDLER):
_M_DIM_HANDLER(m)
_DDOF_HANDLER(ddof)
return _prepare_cov_input_inner(m, y, rowvar, dtype)

if numpy_version >= (1, 10): # replicate behaviour post numpy 1.10 bugfix release
@overload(np.cov)
def np_cov(m, y=None, rowvar=True, bias=False, ddof=None):

# reject problem if m and / or y are more than 2D
check_dimensions(m, 'm')
check_dimensions(y, 'y')

# reject problem if ddof invalid (either upfront if type is
# obviously invalid, or later if value found to be non-integral)
if ddof in (None, types.none):
_DDOF_HANDLER = _handle_ddof_nop
else:
if isinstance(ddof, (types.Integer, types.Boolean)):
_DDOF_HANDLER = _handle_ddof_nop
elif isinstance(ddof, types.Float):
_DDOF_HANDLER = _handle_ddof
else:
raise TypingError('ddof must be a real numerical scalar type')

# special case for 2D array input with 1 row of data - select
# handler function which we'll call later when we have access
# to the shape of the input array
_M_DIM_HANDLER = _handle_m_dim_nop
if isinstance(m, types.Array):
_M_DIM_HANDLER = _handle_m_dim_change

# infer result dtype
m_dt = determine_dtype(m)
y_dt = determine_dtype(y)
dtype = np.result_type(m_dt, y_dt, np.float64)

def np_cov_impl(m, y=None, rowvar=True, bias=False, ddof=None):
X = _prepare_cov_input(m, y, rowvar, dtype, ddof, _DDOF_HANDLER, _M_DIM_HANDLER).astype(dtype)

if np.any(np.array(X.shape) == 0):
return np.full((X.shape[0], X.shape[0]), fill_value=np.nan, dtype=dtype)
else:
return np_cov_impl_inner(X, bias, ddof)

def np_cov_impl_single_variable(m, y=None, rowvar=True, bias=False, ddof=None):
X = _prepare_cov_input(m, y, rowvar, ddof, dtype, _DDOF_HANDLER, _M_DIM_HANDLER).astype(dtype)

if np.any(np.array(X.shape) == 0):
variance = np.nan
else:
variance = np_cov_impl_inner(X, bias, ddof).flat[0]

return np.array(variance)

# identify up front if output is 0D
if isinstance(m, types.Array) and m.ndim == 1:
if y in (None, types.none):
return np_cov_impl_single_variable

if isinstance(m, types.BaseTuple):
if all(isinstance(x, (types.Number, types.Boolean)) for x in m.types):
if y in (None, types.none):
return np_cov_impl_single_variable
else:
if len(m.types) == 1 and isinstance(m.types[0], types.BaseTuple):
if y in (None, types.none):
return np_cov_impl_single_variable

if isinstance(m, (types.Number, types.Boolean)):
if y in (None, types.none):
return np_cov_impl_single_variable

if isinstance(m, types.Sequence):
if not isinstance(m.key[0], types.Sequence) and y in (None, types.none):
return np_cov_impl_single_variable

# otherwise assume it's 2D and we're good to go
return np_cov_impl

#----------------------------------------------------------------------------
# Element-wise computations

Expand Down

0 comments on commit b44b5e6

Please sign in to comment.