Merge pull request #3345 from rjenc29/cov

Support for np.cov
numba · Oct 31, 2018 · b44b5e6 · b44b5e6
2 parents 5996c58 + e825651
commit b44b5e6
Show file tree

Hide file tree

Showing 3 changed files with 414 additions and 2 deletions.
diff --git a/docs/source/reference/numpysupported.rst b/docs/source/reference/numpysupported.rst
@@ -263,6 +263,7 @@ The following top-level functions are supported:
 * :func:`numpy.convolve` (only the 2 first arguments)
 * :func:`numpy.copy` (only the first argument)
 * :func:`numpy.correlate` (only the 2 first arguments)
+* :func:`numpy.cov` (only the 5 first arguments, requires NumPy >= 1.10 and SciPy >= 0.16)
 * :func:`numpy.diag`
 * :func:`numpy.digitize`
 * :func:`numpy.dstack`

diff --git a/numba/targets/arraymath.py b/numba/targets/arraymath.py
@@ -1294,6 +1294,214 @@ def np_vander_seq_impl(x, N=None, increasing=False):
     elif isinstance(x, (types.Tuple, types.Sequence)):
         return np_vander_seq_impl
 
+#----------------------------------------------------------------------------
+# Statistics
+
+@register_jitable
+def row_wise_average(a):
+    assert a.ndim == 2
+
+    m, n = a.shape
+    out = np.empty((m, 1), dtype=a.dtype)
+
+    for i in range(m):
+        out[i, 0] = np.sum(a[i, :]) / n
+
+    return out
+
+@register_jitable
+def np_cov_impl_inner(X, bias, ddof):
+
+    # determine degrees of freedom
+    if ddof is None:
+        if bias:
+            ddof = 0
+        else:
+            ddof = 1
+
+    # determine the normalization factor
+    fact = X.shape[1] - ddof
+
+    # numpy warns if less than 0 and floors at 0
+    fact = max(fact, 0.0)
+
+    # de-mean
+    X -= row_wise_average(X)
+
+    # calculate result - requires blas
+    c = np.dot(X, np.conj(X.T))
+    c *= np.true_divide(1, fact)
+    return c
+
+def _prepare_cov_input_inner():
+    pass
+
+@overload(_prepare_cov_input_inner)
+def _prepare_cov_input_impl(m, y, rowvar, dtype):
+    if y in (None, types.none):
+        def _prepare_cov_input_inner(m, y, rowvar, dtype):
+            m_arr = np.atleast_2d(_asarray(m))
+
+            if not rowvar:
+                m_arr = m_arr.T
+
+            return m_arr
+    else:
+        def _prepare_cov_input_inner(m, y, rowvar, dtype):
+            m_arr = np.atleast_2d(_asarray(m))
+            y_arr = np.atleast_2d(_asarray(y))
+
+            # transpose if asked to and not a (1, n) vector - this looks
+            # wrong as you might end up transposing one and not the other,
+            # but it's what numpy does
+            if not rowvar:
+                if m_arr.shape[0] != 1:
+                    m_arr = m_arr.T
+                if y_arr.shape[0] != 1:
+                    y_arr = y_arr.T
+
+            m_rows, m_cols = m_arr.shape
+            y_rows, y_cols = y_arr.shape
+
+            if m_cols != y_cols:
+                raise ValueError("m and y have incompatible dimensions")
+
+            # allocate and fill output array
+            out = np.empty((m_rows + y_rows, m_cols), dtype=dtype)
+            out[:m_rows, :] = m_arr
+            out[-y_rows:, :] = y_arr
+
+            return out
+
+    return _prepare_cov_input_inner
+
+@register_jitable
+def _handle_m_dim_change(m):
+    if m.ndim == 2 and m.shape[0] == 1:
+        msg = ("2D array containing a single row is unsupported due to "
+               "ambiguity in type inference. To use numpy.cov in this case "
+               "simply pass the row as a 1D array, i.e. m[0].")
+        raise RuntimeError(msg)
+
+_handle_m_dim_nop = register_jitable(lambda x: x)
+
+def determine_dtype(array_like):
+    array_like_dt = np.float64
+    if isinstance(array_like, types.Array):
+        array_like_dt = as_dtype(array_like.dtype)
+    elif isinstance(array_like, (types.UniTuple, types.Tuple)):
+        coltypes = set()
+        for val in array_like:
+            if hasattr(val, 'count'):
+                [coltypes.add(v) for v in val]
+            else:
+                coltypes.add(val)
+        if len(coltypes) > 1:
+            array_like_dt = np.promote_types(*[as_dtype(ty) for ty in coltypes])
+        elif len(coltypes) == 1:
+            array_like_dt = as_dtype(coltypes.pop())
+
+    return array_like_dt
+
+def check_dimensions(array_like, name):
+    if isinstance(array_like, types.Array):
+        if array_like.ndim > 2:
+            raise TypeError("{0} has more than 2 dimensions".format(name))
+    elif isinstance(array_like, types.Sequence):
+        if isinstance(array_like.key[0], types.Sequence):
+            if isinstance(array_like.key[0].key[0], types.Sequence):
+                raise TypeError("{0} has more than 2 dimensions".format(name))
+
+@register_jitable
+def _handle_ddof(ddof):
+    if not np.isfinite(ddof):
+        raise ValueError('Cannot convert non-finite ddof to integer')
+    if ddof - int(ddof) != 0:
+        raise ValueError('ddof must be integral value')
+
+_handle_ddof_nop = register_jitable(lambda x: x)
+
+@register_jitable
+def _prepare_cov_input(m, y, rowvar, dtype, ddof, _DDOF_HANDLER, _M_DIM_HANDLER):
+    _M_DIM_HANDLER(m)
+    _DDOF_HANDLER(ddof)
+    return _prepare_cov_input_inner(m, y, rowvar, dtype)
+
+if numpy_version >= (1, 10):  # replicate behaviour post numpy 1.10 bugfix release
+    @overload(np.cov)
+    def np_cov(m, y=None, rowvar=True, bias=False, ddof=None):
+
+        # reject problem if m and / or y are more than 2D
+        check_dimensions(m, 'm')
+        check_dimensions(y, 'y')
+
+        # reject problem if ddof invalid (either upfront if type is
+        # obviously invalid, or later if value found to be non-integral)
+        if ddof in (None, types.none):
+            _DDOF_HANDLER = _handle_ddof_nop
+        else:
+            if isinstance(ddof, (types.Integer, types.Boolean)):
+                _DDOF_HANDLER = _handle_ddof_nop
+            elif isinstance(ddof, types.Float):
+                _DDOF_HANDLER = _handle_ddof
+            else:
+                raise TypingError('ddof must be a real numerical scalar type')
+
+        # special case for 2D array input with 1 row of data - select
+        # handler function which we'll call later when we have access
+        # to the shape of the input array
+        _M_DIM_HANDLER = _handle_m_dim_nop
+        if isinstance(m, types.Array):
+            _M_DIM_HANDLER = _handle_m_dim_change
+
+        # infer result dtype
+        m_dt = determine_dtype(m)
+        y_dt = determine_dtype(y)
+        dtype = np.result_type(m_dt, y_dt, np.float64)
+
+        def np_cov_impl(m, y=None, rowvar=True, bias=False, ddof=None):
+            X = _prepare_cov_input(m, y, rowvar, dtype, ddof, _DDOF_HANDLER, _M_DIM_HANDLER).astype(dtype)
+
+            if np.any(np.array(X.shape) == 0):
+                return np.full((X.shape[0], X.shape[0]), fill_value=np.nan, dtype=dtype)
+            else:
+                return np_cov_impl_inner(X, bias, ddof)
+
+        def np_cov_impl_single_variable(m, y=None, rowvar=True, bias=False, ddof=None):
+            X = _prepare_cov_input(m, y, rowvar, ddof, dtype, _DDOF_HANDLER, _M_DIM_HANDLER).astype(dtype)
+
+            if np.any(np.array(X.shape) == 0):
+                variance = np.nan
+            else:
+                variance = np_cov_impl_inner(X, bias, ddof).flat[0]
+
+            return np.array(variance)
+
+        # identify up front if output is 0D
+        if isinstance(m, types.Array) and m.ndim == 1:
+            if y in (None, types.none):
+                return np_cov_impl_single_variable
+
+        if isinstance(m, types.BaseTuple):
+            if all(isinstance(x, (types.Number, types.Boolean)) for x in m.types):
+                if y in (None, types.none):
+                    return np_cov_impl_single_variable
+            else:
+                if len(m.types) == 1 and isinstance(m.types[0], types.BaseTuple):
+                    if y in (None, types.none):
+                        return np_cov_impl_single_variable
+
+        if isinstance(m, (types.Number, types.Boolean)):
+            if y in (None, types.none):
+                return np_cov_impl_single_variable
+
+        if isinstance(m, types.Sequence):
+            if not isinstance(m.key[0], types.Sequence) and y in (None, types.none):
+                return np_cov_impl_single_variable
+
+        # otherwise assume it's 2D and we're good to go
+        return np_cov_impl
+
 #----------------------------------------------------------------------------
 # Element-wise computations